From d15ee30f59bc8f16f5e4702ba2e35a76e8ead10c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 08:26:50 +0200 Subject: [PATCH 001/668] Combine BWA and BW kit tasks --- bwa.wdl | 85 +++++---------------------------------------------------- 1 file changed, 7 insertions(+), 78 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 58e1dc80..cdaed83a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -21,78 +21,6 @@ version 1.0 # SOFTWARE. task Mem { - input { - File read1 - File? read2 - BwaIndex bwaIndex - String outputPath - String? readgroup - - Int threads = 4 - Int? sortThreads - Int sortMemoryPerThreadGb = 2 - Int compressionLevel = 1 - Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) - # This container contains: samtools (1.10), bwa (0.7.17-r1188) - String dockerImage = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" - } - - # Samtools sort may block the pipe while it is writing data to disk. - # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. - Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) - Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - - command { - set -e -o pipefail - mkdir -p "$(dirname ~{outputPath})" - bwa mem \ - ~{"-t " + threads} \ - ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ - ~{bwaIndex.fastaFile} \ - ~{read1} \ - ~{read2} \ - | samtools sort \ - ~{"-@ " + totalSortThreads} \ - -m ~{sortMemoryPerThreadGb}G \ - -l ~{compressionLevel} \ - - \ - -o ~{outputPath} - } - - output { - File outputBam = outputPath - } - - runtime { - cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - read1: {description: "The first or single end fastq file.", category: "required"} - read2: {description: "The second end fastq file.", category: "common"} - bwaIndex: {description: "The BWA index files.", category: "required"} - outputPath: {description: "The location the output BAM file should be written to.", category: "required"} - readgroup: {description: "The readgroup to be assigned to the reads. See BWA mem's `-R` option.", category: "common"} - - threads: {description: "The number of threads to use.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task Kit { input { File read1 File? read2 @@ -100,7 +28,7 @@ task Kit { String outputPrefix String? readgroup Boolean sixtyFour = false - + Boolean usePostalt = false Int threads = 4 Int? sortThreads Int sortMemoryPerThreadGb = 2 @@ -118,6 +46,8 @@ task Kit { Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + String bwaKitCommand = "bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}" + (if sixtyFour then ".64.alt" else ".alt") + " | " + String kitCommandString = if usePostalt then bwaKitCommand else "" command { set -e @@ -129,10 +59,8 @@ task Kit { ~{read1} \ ~{read2} \ 2> ~{outputPrefix}.log.bwamem | \ - bwa-postalt.js \ - -p ~{outputPrefix}.hla \ - ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ - samtools sort \ + ~{kitCommandString} \ + samtools sort \ ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ @@ -157,7 +85,8 @@ task Kit { # inputs read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} - bwaIndex: {description: "The BWA index, including a .alt file.", category: "required"} + bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} From 830eb51555889da4c3733ad5c7bbea4528a57887 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 09:26:14 +0200 Subject: [PATCH 002/668] Enable/disable postalt with comment --- bwa.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index cdaed83a..f2c731f2 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -46,9 +46,8 @@ task Mem { Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - String bwaKitCommand = "bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}" + (if sixtyFour then ".64.alt" else ".alt") + " | " - String kitCommandString = if usePostalt then bwaKitCommand else "" + # The bwa postalt script is out commented as soon as usePostalt = false. It is a hack but it should work. command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -59,7 +58,7 @@ task Mem { ~{read1} \ ~{read2} \ 2> ~{outputPrefix}.log.bwamem | \ - ~{kitCommandString} \ + ~{true="" false="#" usePostalt} bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ @@ -70,6 +69,7 @@ task Mem { output { File outputBam = outputPrefix + ".aln.bam" + File? outputHla = outputPrefix + ".hla" } runtime { From 2b073f668e17643ac393d012986b9bcc3096978f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 09:30:09 +0200 Subject: [PATCH 003/668] Add comments on comments --- bwa.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index f2c731f2..fdeb870f 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -47,7 +47,8 @@ task Mem { # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - # The bwa postalt script is out commented as soon as usePostalt = false. It is a hack but it should work. + # The bwa postalt script is out commented as soon as usePostalt = false. + # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e mkdir -p "$(dirname ~{outputPrefix})" From 997b7765a0403778ad842ae2a8e1c50f38bfd05b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 09:55:49 +0200 Subject: [PATCH 004/668] Add bwa-mem2 task --- bwa-mem2.wdl | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 bwa-mem2.wdl diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl new file mode 100644 index 00000000..df3801b4 --- /dev/null +++ b/bwa-mem2.wdl @@ -0,0 +1,112 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Mem { + # NOTE: THIS IS A COPY OF THE BWA TASK WITH ONLY bwa CHANGED TO bwa-mem2 AND A DIFFERENT DOCKER IMAGE. + input { + File read1 + File? read2 + BwaIndex bwaIndex + String outputPrefix + String? readgroup + Boolean sixtyFour = false + Boolean usePostalt = false + Int threads = 4 + Int? sortThreads + Int sortMemoryPerThreadGb = 2 + Int compressionLevel = 1 + Int? memoryGb + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 + String dockerImage = "biowdl/bwamem2-kit:2.0-dev" + } + + # Samtools sort may block the pipe while it is writing data to disk. + # This can lead to cpu underutilization. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) + Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + + # The bwa postalt script is out commented as soon as usePostalt = false. + # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bwa-mem2 mem \ + -t ~{threads} \ + ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ + ~{bwaIndex.fastaFile} \ + ~{read1} \ + ~{read2} \ + 2> ~{outputPrefix}.log.bwamem | \ + ~{true="" false="#" usePostalt} bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ + samtools sort \ + ~{"-@ " + totalSortThreads} \ + -m ~{sortMemoryPerThreadGb}G \ + -l ~{compressionLevel} \ + - \ + -o ~{outputPrefix}.aln.bam + } + + output { + File outputBam = outputPrefix + ".aln.bam" + File? outputHla = outputPrefix + ".hla" + } + + runtime { + # One extra thread for bwa-postalt + samtools is not needed. + # These only use 5-10% of compute power and not always simultaneously. + cpu: threads + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + read1: {description: "The first-end fastq file.", category: "required"} + read2: {description: "The second-end fastq file.", category: "common"} + bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} + outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputBam: "The produced BAM file." + } +} + +struct BwaIndex { + File fastaFile + Array[File] indexFiles +} From e968433fdc7d7f26986ddd1ba264f80dd7579d37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 15:34:30 +0200 Subject: [PATCH 005/668] Update image --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index df3801b4..5ac6958e 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 - String dockerImage = "biowdl/bwamem2-kit:2.0-dev" + String dockerImage = "biowdl/bwamem2-kit:2.0-dev2" # TODO: Update to biocontainer. } # Samtools sort may block the pipe while it is writing data to disk. From d27eea90b9aa3b3683de5522f8f5cb541ec86211 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 27 Jul 2020 10:21:55 +0200 Subject: [PATCH 006/668] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55fb1e8a..d3d719f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ A bwa-mem2 task was created with the same interface (including usePostalt) + as the bwa mem task. ++ bwa mem and bwa kit are now one task. The usePostalt boolean can be used to + switch the postalt script on and off. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. Using more threads reduces the chance of the samtools sort pipe getting From 18fb322e8b24dee1292f56b6245dc26325eb5ffd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 27 Jul 2020 15:59:21 +0200 Subject: [PATCH 007/668] use mulled biocontainer --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 5ac6958e..d3290d0b 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 - String dockerImage = "biowdl/bwamem2-kit:2.0-dev2" # TODO: Update to biocontainer. + String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } # Samtools sort may block the pipe while it is writing data to disk. From 9a9bf3f5b1bcef7669b05fcc132caadf411e1140 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 28 Jul 2020 15:51:43 +0200 Subject: [PATCH 008/668] add gridss task --- gridss.wdl | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 gridss.wdl diff --git a/gridss.wdl b/gridss.wdl new file mode 100644 index 00000000..37ac83b7 --- /dev/null +++ b/gridss.wdl @@ -0,0 +1,65 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import "bwa.wdl" as bwa + +task GRIDSS { + input { + File tumorBam + File tumorBai + String tumorLabel + File? normalBam + File? normalBai + String? normalLabel + BwaIndex reference + String outputPrefix = "gridss" + + Int threads = 1 + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + } + + command { + gridss \ + --reference ~{reference.fastaFile} \ + --output ~{outputPrefix}.vcf.gz \ + --assembly ~{outputPrefix}_assembly.bam \ + ~{"-t " + threads} \ + --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + ~{normalBam} \ + ~{tumorBam} + tabix -p vcf ~{outputPrefix}.vcf.gz + samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai + } + + output { + File vcf = outputPrefix + ".vcf.gz" + File vcfIndex = outputPrefix + ".vcf.gz.tbi" + File assembly = outputPrefix + "_assembly.bam" + File assemblyIndex = outputPrefix + "_assembly.bai" + } + + runtime { + cpu: threads + memory: "32G" + docker: dockerImage + } +} \ No newline at end of file From 20068636fe79595050396d2bdc665fde8126bf33 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jul 2020 10:17:58 +0200 Subject: [PATCH 009/668] slightly tune memory requirements --- bwa-mem2.wdl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index d3290d0b..6ea4578d 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -21,7 +21,6 @@ version 1.0 # SOFTWARE. task Mem { - # NOTE: THIS IS A COPY OF THE BWA TASK WITH ONLY bwa CHANGED TO bwa-mem2 AND A DIFFERENT DOCKER IMAGE. input { File read1 File? read2 @@ -45,8 +44,13 @@ task Mem { # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + # BWA-mem2's index files contain 2 BWT indexes of which only one is used. .2bit64 is used by default and + # .8bit32 is used for avx2. + # The larger one of these is the 8bit32 index. Since we do not know beforehand which one is used we need to accomodate for that. + # Using only the 8bit32 index uses 57,5% of the index files. Since bwa-mem2 uses slightly more memory than the index + # We put it at 62% as a safety factor. That means the memory usage for bwa-mem will be 53G for a human genome. Resulting in 60G total + # on 8 cores with samtools with 3 sort threads. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 0.62) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. From 94128f4fe9fee9bfc88b7c96c07768141c197fa7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 29 Jul 2020 13:40:05 +0200 Subject: [PATCH 010/668] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 255c2186..67ea94a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Added a task for GRIDSS. + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. From 247561f6bfe261744902980621ad133f2ba8d971 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 29 Jul 2020 14:30:09 +0200 Subject: [PATCH 011/668] add parameter_meta to gridss --- gridss.wdl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 37ac83b7..14bc441c 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -38,6 +38,8 @@ task GRIDSS { } command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" gridss \ --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ @@ -62,4 +64,18 @@ task GRIDSS { memory: "32G" docker: dockerImage } + + parameter_meta { + tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"} + tumorBai: {description: "The index for tumorBam.", category: "required"} + tumorLabel: {description: "The name of the (tumor) sample.", category: "required"} + normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} + normalBai: {description: "The index for normalBam.", category: "advanced"} + normalLabel: {description: "The name of the normal sample.", category: "advanced"} + BwaIndex reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} + + threads: {description: "The number of the threads to use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } } \ No newline at end of file From 82f0cc79f1d2d49a5d34c27ea743f1be7655d7f5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 29 Jul 2020 14:38:13 +0200 Subject: [PATCH 012/668] fix parameter_meta --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 14bc441c..3d4b7d73 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -72,7 +72,7 @@ task GRIDSS { normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} - BwaIndex reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} From 5d5a335ae7791d360af366db3ce461bc6c07ca7e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 5 Aug 2020 10:40:12 +0200 Subject: [PATCH 013/668] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 944880fa..ee74734a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0 +4.1.0 From d1922724faf06dac8e835c395fc37e5d5e64f515 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 6 Aug 2020 13:21:07 +0200 Subject: [PATCH 014/668] add missing category for outputType in bcftools view --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 2677899b..8875903b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -58,7 +58,7 @@ task View { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 8591feb0815fd44472761359244c4ee6c6d45752 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 7 Aug 2020 13:42:20 +0200 Subject: [PATCH 015/668] Add pacbio bam2fastx tool. --- CHANGELOG.md | 4 ++ bam2fastx.wdl | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 bam2fastx.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index 96f4559c..272499c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.0-dev +--------------------------- ++ Add wdl file for pacbio's bam2fastx tool. + version 4.0.0 --------------------------- + Picard MergeVcf now uses compression level 1 by default. diff --git a/bam2fastx.wdl b/bam2fastx.wdl new file mode 100644 index 00000000..09c56897 --- /dev/null +++ b/bam2fastx.wdl @@ -0,0 +1,129 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Bam2Fasta { + input { + File inputFile + String outputPrefix + Int compressionLevel = 1 + Boolean uncompressedOutput = false + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bam2fasta \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} + ~{true="-u" false="" uncompressedOutput} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + ~{inputFile} + } + + output { + File fastaFile = outputPrefix + ".fasta.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input pacbio bam file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastaFile: {description: "The fasta output file."} + } +} + +task Bam2Fastq { + input { + File inputFile + String outputPrefix + Int compressionLevel = 1 + Boolean uncompressedOutput = false + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bam2fastq \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} + ~{true="-u" false="" uncompressedOutput} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + ~{inputFile} + } + + output { + File fastaFile = outputPrefix + ".fastq.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input pacbio bam file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastaFile: {description: "The fastq output file."} + } +} From 9ad9425766843e2706ff440457d6ec1d8b21916b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 7 Aug 2020 13:44:02 +0200 Subject: [PATCH 016/668] Correct output naming. --- bam2fastx.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 09c56897..21f1c604 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -102,7 +102,7 @@ task Bam2Fastq { } output { - File fastaFile = outputPrefix + ".fastq.gz" + File fastqFile = outputPrefix + ".fastq.gz" } runtime { @@ -124,6 +124,6 @@ task Bam2Fastq { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - fastaFile: {description: "The fastq output file."} + fastqFile: {description: "The fastq output file."} } } From aea639c83bc4b306df01986f55f4e774208e8a8e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 10 Aug 2020 11:27:23 +0200 Subject: [PATCH 017/668] Add index input to the tasks. --- bam2fastx.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 21f1c604..27ed15cc 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -23,6 +23,7 @@ version 1.0 task Bam2Fasta { input { File inputFile + File bamIndex String outputPrefix Int compressionLevel = 1 Boolean uncompressedOutput = false @@ -60,6 +61,7 @@ task Bam2Fasta { parameter_meta { # inputs inputFile: {description: "The input pacbio bam file.", category: "required"} + bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} @@ -77,6 +79,7 @@ task Bam2Fasta { task Bam2Fastq { input { File inputFile + File bamIndex String outputPrefix Int compressionLevel = 1 Boolean uncompressedOutput = false @@ -114,6 +117,7 @@ task Bam2Fastq { parameter_meta { # inputs inputFile: {description: "The input pacbio bam file.", category: "required"} + bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} From 628b9169e7791eaad69b3c58f3f0b324a529be12 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 10 Aug 2020 13:25:23 +0200 Subject: [PATCH 018/668] Add missing ". --- bam2fastx.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 27ed15cc..ccea6edb 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -39,9 +39,10 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + chmod 755 ~{inputFile} bam2fasta \ --output ~{outputPrefix} \ - -c ~{compressionLevel} + -c ~{compressionLevel} \ ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ @@ -95,9 +96,10 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + chmod 755 ~{inputFile} bam2fastq \ --output ~{outputPrefix} \ - -c ~{compressionLevel} + -c ~{compressionLevel} \ ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ From 4da76be86e4cfe93a63ab0700468c1be9f572683 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 10 Aug 2020 14:05:35 +0200 Subject: [PATCH 019/668] Remove left-over chmod. --- bam2fastx.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index ccea6edb..f9699d3b 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -39,7 +39,6 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - chmod 755 ~{inputFile} bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ @@ -96,7 +95,6 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - chmod 755 ~{inputFile} bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ From adac77e53089d7875c83ad16bb7271621c30abcb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 10:30:53 +0200 Subject: [PATCH 020/668] Remove uncompressed output options. --- bam2fastx.wdl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index f9699d3b..5e5fb50a 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -26,7 +26,6 @@ task Bam2Fasta { File bamIndex String outputPrefix Int compressionLevel = 1 - Boolean uncompressedOutput = false Boolean splitByBarcode = false String? seqIdPrefix @@ -42,7 +41,6 @@ task Bam2Fasta { bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ - ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ ~{inputFile} @@ -64,7 +62,6 @@ task Bam2Fasta { bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} - uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -82,7 +79,6 @@ task Bam2Fastq { File bamIndex String outputPrefix Int compressionLevel = 1 - Boolean uncompressedOutput = false Boolean splitByBarcode = false String? seqIdPrefix @@ -98,7 +94,6 @@ task Bam2Fastq { bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ - ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ ~{inputFile} @@ -120,7 +115,6 @@ task Bam2Fastq { bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} - uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From 10236b504fe1e272690e7976f4c281d0cfa13027 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 11 Aug 2020 13:23:33 +0200 Subject: [PATCH 021/668] update CHANGELOG --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0964883..8a882f09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,13 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.0-dev +version 4.1.0-dev --------------------------- ++ Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. version 4.0.0 --------------------------- -+ Added a task for GRIDSS. + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. From 710cb79676d11663c8d951373265c9c80325aee3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 14:14:55 +0200 Subject: [PATCH 022/668] Fix index localization. --- CHANGELOG.md | 2 ++ bam2fastx.wdl | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 272499c4..af613151 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add copy command to bam2fastx tasks to make sure bam file and its index are + always in the same directory. + Add wdl file for pacbio's bam2fastx tool. version 4.0.0 diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 5e5fb50a..6a09202f 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -24,6 +24,7 @@ task Bam2Fasta { input { File inputFile File bamIndex + String basenameInputFile = basename(inputFile) String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -38,12 +39,16 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + # The bam file and its index need to be in the same directory. + # Cromwell will put them in separate iputs folders. + cp ~{inputFile} ./ + cp ~{bamIndex} ./ bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ./~{basenameInputFile} } output { @@ -76,6 +81,7 @@ task Bam2Fasta { task Bam2Fastq { input { File inputFile + String basenameInputFile = basename(inputFile) File bamIndex String outputPrefix Int compressionLevel = 1 @@ -91,12 +97,16 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + # The bam file and its index need to be in the same directory. + # Cromwell will put them in separate iputs folders. + cp ~{inputFile} ./ + cp ~{bamIndex} ./ bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ./~{basenameInputFile} } output { From 48351b26c1ab7caad71432b84a0fcfa51c9f4388 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Aug 2020 14:25:28 +0200 Subject: [PATCH 023/668] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4446daa8..ab8d8867 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 4.1.0-dev +version 5.0.0-dev --------------------------- + A bwa-mem2 task was created with the same interface (including usePostalt) as the bwa mem task. From 01df29baef579cce73a32cd109d5405d45c197bd Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 15:18:17 +0200 Subject: [PATCH 024/668] Change lima outputs. --- CHANGELOG.md | 3 +-- bam2fastx.wdl | 14 ++------------ lima.wdl | 9 ++++++--- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b839b35b..57bbfecb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,7 @@ that users understand how the changes affect the new version. version 4.1.0-dev --------------------------- -+ Add copy command to bam2fastx tasks to make sure bam file and its index are - always in the same directory. ++ Remove globs from lima outputs. + Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 6a09202f..5e5fb50a 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -24,7 +24,6 @@ task Bam2Fasta { input { File inputFile File bamIndex - String basenameInputFile = basename(inputFile) String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -39,16 +38,12 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - # The bam file and its index need to be in the same directory. - # Cromwell will put them in separate iputs folders. - cp ~{inputFile} ./ - cp ~{bamIndex} ./ bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ./~{basenameInputFile} + ~{inputFile} } output { @@ -81,7 +76,6 @@ task Bam2Fasta { task Bam2Fastq { input { File inputFile - String basenameInputFile = basename(inputFile) File bamIndex String outputPrefix Int compressionLevel = 1 @@ -97,16 +91,12 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - # The bam file and its index need to be in the same directory. - # Cromwell will put them in separate iputs folders. - cp ~{inputFile} ./ - cp ~{bamIndex} ./ bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ./~{basenameInputFile} + ~{inputFile} } output { diff --git a/lima.wdl b/lima.wdl index 2e8a7085..1a40b1c8 100644 --- a/lima.wdl +++ b/lima.wdl @@ -95,12 +95,15 @@ task Lima { cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" + find . -path "*.bam" > bamFiles.txt + find . -path "*.bam.pbi" > bamIndexes.txt + find . -path "*.subreadset.xml" > subreadsets.txt } output { - Array[File] limaBam = glob("*.bam") - Array[File] limaBamIndex = glob("*.bam.pbi") - Array[File] limaXml = glob("*.subreadset.xml") + Array[File] limaBam = read_lines("bamFiles.txt") + Array[File] limaBamIndex = read_lines("bamIndexes.txt") + Array[File] limaXml = read_lines("subreadsets.txt") File limaStderr = outputPrefix + ".fl.stderr.log" File limaJson = outputPrefix + ".fl.json" File limaCounts = outputPrefix + ".fl.lima.counts" From af73c53935206dd54b37079e1c8d6a5b053c4a46 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 17:50:15 +0200 Subject: [PATCH 025/668] Change inputs to arrays. --- CHANGELOG.md | 3 ++- bam2fastx.wdl | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57bbfecb..eb2ef271 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ that users understand how the changes affect the new version. version 4.1.0-dev --------------------------- -+ Remove globs from lima outputs. ++ Bam2fastx: Input bam and index are now arrays. ++ Lima: Remove globs from outputs. + Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 5e5fb50a..a8f1342c 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -22,8 +22,8 @@ version 1.0 task Bam2Fasta { input { - File inputFile - File bamIndex + Array[File]+ inputFile + Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -43,7 +43,7 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ~{sep=" " inputFile} } output { @@ -58,8 +58,8 @@ task Bam2Fasta { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file.", category: "required"} - bamIndex: {description: "The .pbi index for the input file.", category: "required"} + inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} @@ -75,8 +75,8 @@ task Bam2Fasta { task Bam2Fastq { input { - File inputFile - File bamIndex + Array[File]+ inputFile + Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -96,7 +96,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ~{sep=" " inputFile} } output { @@ -111,8 +111,8 @@ task Bam2Fastq { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file.", category: "required"} - bamIndex: {description: "The .pbi index for the input file.", category: "required"} + inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} From a4af699b14f325e1729e307a21058cb25da0d251 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 15:06:02 +0200 Subject: [PATCH 026/668] update gridss: add --jvmheap parameter --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 3d4b7d73..4ba4bc17 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,6 +33,7 @@ task GRIDSS { BwaIndex reference String outputPrefix = "gridss" + String jvmheapsize = "25G" Int threads = 1 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -45,6 +46,7 @@ task GRIDSS { --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ + ~{"--jvmheap " + jvmheapsize} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{normalBam} \ ~{tumorBam} From 9bfe4ebf231bd307dc546dff34c8b96823058718 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 15:09:40 +0200 Subject: [PATCH 027/668] update task/CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a882f09..2360a877 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.1.0-dev --------------------------- ++ Updated task gridss.wdl: add --jvmheap parameter + Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. From ac3ee59598026cb22cf40325dbf32b0bc5e988fb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 13 Aug 2020 15:17:53 +0200 Subject: [PATCH 028/668] Rename input files. --- bam2fastx.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index a8f1342c..42240cd4 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -22,7 +22,7 @@ version 1.0 task Bam2Fasta { input { - Array[File]+ inputFile + Array[File]+ bam Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 @@ -43,7 +43,7 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " inputFile} + ~{sep=" " bam} } output { @@ -58,7 +58,7 @@ task Bam2Fasta { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bam: {description: "The input pacbio bam file(s).", category: "required"} bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} @@ -75,7 +75,7 @@ task Bam2Fasta { task Bam2Fastq { input { - Array[File]+ inputFile + Array[File]+ bam Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 @@ -96,7 +96,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " inputFile} + ~{sep=" " bam} } output { @@ -111,7 +111,7 @@ task Bam2Fastq { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bam: {description: "The input pacbio bam file(s).", category: "required"} bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} From 8b378196020a0a4151dbb06d2452e2e05a3c12e5 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 16:01:16 +0200 Subject: [PATCH 029/668] update gridss.wdl --- gridss.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 4ba4bc17..04ea2e82 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task GRIDSS { BwaIndex reference String outputPrefix = "gridss" - String jvmheapsize = "25G" + Int jvmHeapSizeGb = 1 Int threads = 1 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -46,7 +46,7 @@ task GRIDSS { --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ - ~{"--jvmheap " + jvmheapsize} \ + ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{normalBam} \ ~{tumorBam} @@ -63,7 +63,7 @@ task GRIDSS { runtime { cpu: threads - memory: "32G" + memory: "~{jvmHeapSizeGb}G" docker: dockerImage } From b654fee3d284e55e1f73f21621ee01e18fa731a8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 16:21:30 +0200 Subject: [PATCH 030/668] change default jvmHeapSizeGb from 1G to 30G --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 04ea2e82..3b7859b6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task GRIDSS { BwaIndex reference String outputPrefix = "gridss" - Int jvmHeapSizeGb = 1 + Int jvmHeapSizeGb = 30 Int threads = 1 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -63,7 +63,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb}G" + memory: "~{jvmHeapSizeGb + 1}G" docker: dockerImage } From e941b853a9ff8e194c8b1af2dc28dffddb58d8be Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 08:56:34 +0200 Subject: [PATCH 031/668] add parameter_meta for SVcalling.gridss.jvmHeapSizeGb --- gridss.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/gridss.wdl b/gridss.wdl index 3b7859b6..3649cb1b 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -78,6 +78,7 @@ task GRIDSS { outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} + javaXmxMb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file From 1e8155c26a770e2aab4b46fcf74f5c98b4f7945d Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 09:04:25 +0200 Subject: [PATCH 032/668] small fix --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 3649cb1b..44b9e9f1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -78,7 +78,7 @@ task GRIDSS { outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} - javaXmxMb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file From 31b21e1e197b5c646b2ad202cd4fa56cc54816e8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 16:10:47 +0200 Subject: [PATCH 033/668] add bcftools annotate --- bcftools.wdl | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 8875903b..33685c33 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -22,6 +22,106 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task Annotate { + input { + File? annsFile + String? collapse + Array[String] columns = [] + String? exclude + Boolean force = false + File? headerLines + String? newId + String? include + Boolean keepSites = false + String? markSites + Boolean noVersion = false + String outputType = "z" + String? regions + File? regionsFile + File? renameChrs + Array[String] samples = [] + File? samplesFile + Boolean singleOverlaps = false + Array[String] removeAnns = [] + File inputFile + String outputPath = "output.vcf.gz" + + Int threads = 0 + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools annotate \ + -o ~{outputPath} \ + -O ~{outputType} \ + ~{"--annotations " + annsFile} \ + ~{"--collapse " + collapse} \ + ~{true="--columns" false="" length(columns) > 0} ~{sep="," columns} \ + ~{"--exclude " + exclude} \ + ~{true="--force" false="" force} \ + ~{"--header-lines " + headerLines} \ + ~{"--set-id " + newId} \ + ~{"--include " + include} \ + ~{true="--keep-sites" false="" keepSites} \ + ~{"--mark-sites " + markSites} \ + ~{true="--no-version" false="" noVersion} \ + ~{"--regions " + regions} \ + ~{"--regions-file " + regionsFile} \ + ~{"--rename-chrs " + renameChrs} \ + ~{true="--samples" false="" length(samples) > 0} ~{sep="," samples} \ + ~{"--samples-file " + samplesFile} \ + ~{true="--single-overlaps" false="" singleOverlaps} \ + ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ + ~{inputFile} + bcftools index --tbi ~{outputPath} + + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} + annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} + collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} + columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} + headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} + newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} + markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} + noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} + regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} + regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} + renameChrs: {description: "rename chromosomes according to the map in file (see man page for details).", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + samplesFile: {description: "File of samples to include.", category: "advanced"} + singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} + removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} + inputFile: {description: "A vcf or bcf file.", category: "required"} + + threads: {description: "Number of extra decompression threads [0].", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + } +} + task View { input { File inputFile From df6fe2df5e1276a39eaf6981f86b93d49cbbddda Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 16:12:38 +0200 Subject: [PATCH 034/668] small fix: change vcf to bcf --- delly.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index efa1bf60..f708f494 100644 --- a/delly.wdl +++ b/delly.wdl @@ -28,7 +28,7 @@ task CallSV { File bamIndex File referenceFasta File referenceFastaFai - String outputPath = "./delly/delly.vcf" + String outputPath = "./delly/delly.bcf" String memory = "15G" Int timeMinutes = 300 From 1241b96fce92f28fa747cde02081e00edb0aa506 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 17:12:04 +0200 Subject: [PATCH 035/668] add bcftools sort --- bcftools.wdl | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 33685c33..d72efde3 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -122,6 +122,44 @@ task Annotate { } } +task Sort { + input { + File inputFile + String outputPath = "output.vcf.gz" + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + String outputType = "z" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools sort \ + -o ~{outputPath} \ + -O ~{outputType} \ + ~{inputFile} + bcftools index --tbi ~{outputPath} + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } + + +} + task View { input { File inputFile From f07a59aede3deb6e2001e0907ce3073079a20d63 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 17:16:17 +0200 Subject: [PATCH 036/668] add output {} --- bcftools.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index d72efde3..520bcf15 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -142,6 +142,11 @@ task Sort { bcftools index --tbi ~{outputPath} } + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + runtime { memory: memory time_minutes: timeMinutes From 4664f90c91fd801a7cb6322cf69333fd44dfcd92 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 17:45:12 +0200 Subject: [PATCH 037/668] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1733c93f..85beb2eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. + Updated task gridss.wdl: add --jvmheap parameter From 3327f388f3ac184c1c0bc37dd2e920dc2e8e71fb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 17 Aug 2020 11:29:20 +0200 Subject: [PATCH 038/668] Update submodules. --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index c0b48b0a..0cca0f40 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c0b48b0a916913d1e6751d7744d1cec37559a81f +Subproject commit 0cca0f40a8e9121e8dcc9e76838f85835a0d8e94 From e554f35a07e4f6427e1d8ad1cb7ddcaf3fc50ce0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 20 Aug 2020 15:38:25 +0200 Subject: [PATCH 039/668] add sage task --- sage.wdl | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 sage.wdl diff --git a/sage.wdl b/sage.wdl new file mode 100644 index 00000000..dbc101dc --- /dev/null +++ b/sage.wdl @@ -0,0 +1,92 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sage { + input { + String tumorName + File tumorBam + File tumorBai + String? normalName + File? normalBam + File? normalBai + String assembly + File referenceFasta + File hotspotVcf + File panelBed + File highConfidenceBed + + Int timeMinutes = 60 #FIXME I've no idea how long this takes... + Int threads = 2 + String javaXmx = "32G" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + } + + command { + SAGE \ + -Xmx~{javaXmx} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -assembly ~{assembly} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspotVcf} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -threads ~{threads} \ + + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + time_minutes: timeMinutes + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + assembly: {description: "The assembly of the reference genomes, either hg19 or hg38.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + hotspotVcf: {description: "A VCF file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing a panel of cancer related genes.", category: "required"} + highConfidenceBed: {description: "A bed file describing high confidence regions.", category: "required"} + + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file From a6c3487834da5ce6b6a40ce2e966e9d899abb240 Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 24 Aug 2020 09:41:30 +0200 Subject: [PATCH 040/668] Add option to ignore masked reference --- vt.wdl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vt.wdl b/vt.wdl index d4c134b9..8a9f9de8 100644 --- a/vt.wdl +++ b/vt.wdl @@ -26,6 +26,7 @@ task Normalize { File inputVCFIndex File referenceFasta File referenceFastaFai + Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" String memory = "4G" @@ -33,9 +34,12 @@ task Normalize { } command { - set -e + set -eo pipefail mkdir -p "$(dirname ~{outputPath})" - vt normalize ~{inputVCF} -r ~{referenceFasta} | vt decompose -s - -o ~{outputPath} + vt normalize ~{inputVCF} \ + -r ~{referenceFasta} \ + ~{true="-m " false="" ignoreMaskedRef} \ + | vt decompose -s - -o ~{outputPath} } output { @@ -55,6 +59,7 @@ task Normalize { outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced" memory: {description: "The memory required to run the programs", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 47651a09cf7d3cd0fb45bdc20d5ef0227a3bbcd3 Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 24 Aug 2020 09:42:47 +0200 Subject: [PATCH 041/668] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85beb2eb..0d1805ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ vt: Add option to ignore masked reference. + bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. From fbad1676097484b301fed9e55b36d39dcd7a7524 Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 24 Aug 2020 09:55:52 +0200 Subject: [PATCH 042/668] Add closing bracket --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 8a9f9de8..99cc1318 100644 --- a/vt.wdl +++ b/vt.wdl @@ -59,7 +59,7 @@ task Normalize { outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced" + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From f335ac9b5d0d061fce172ebd843d76e46e3e1ed1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Aug 2020 12:56:51 +0200 Subject: [PATCH 043/668] adjust sage --- sage.wdl | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/sage.wdl b/sage.wdl index dbc101dc..ba0a6137 100644 --- a/sage.wdl +++ b/sage.wdl @@ -28,32 +28,27 @@ task Sage { String? normalName File? normalBam File? normalBai - String assembly File referenceFasta - File hotspotVcf - File panelBed - File highConfidenceBed + File referenceFastaDict + File referenceFastaFai + File knownHotspots + File codingRegsions Int timeMinutes = 60 #FIXME I've no idea how long this takes... - Int threads = 2 String javaXmx = "32G" + String memory = "33G" String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" } command { - SAGE \ - -Xmx~{javaXmx} \ + SAGE -Xmx~{javaXmx} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ ~{"-reference_bam " + normalBam} \ - -assembly ~{assembly} \ -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspotVcf} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -threads ~{threads} \ - + -known_hotspots ~{knownHotspots} \ + -coding_regions ~{codingRegsions} \ -out ~{outputPath} } @@ -74,12 +69,13 @@ task Sage { tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - assembly: {description: "The assembly of the reference genomes, either hg19 or hg38.", category: "required"} + normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} - hotspotVcf: {description: "A VCF file with hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file describing a panel of cancer related genes.", category: "required"} - highConfidenceBed: {description: "A bed file describing high confidence regions.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} + codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 2b8e422685de9ea6f63831d8780231a058c1b0cb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 1 Sep 2020 15:08:25 +0200 Subject: [PATCH 044/668] add sagev2 --- sage.wdl | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/sage.wdl b/sage.wdl index ba0a6137..ed3d0866 100644 --- a/sage.wdl +++ b/sage.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Sage { +task SageHotspot { input { String tumorName File tumorBam @@ -33,6 +33,7 @@ task Sage { File referenceFastaFai File knownHotspots File codingRegsions + String outputPath = "./sage_hotspot.vcf.gz" Int timeMinutes = 60 #FIXME I've no idea how long this takes... String javaXmx = "32G" @@ -58,7 +59,6 @@ task Sage { runtime { time_minutes: timeMinutes - cpu: threads docker: dockerImage memory: memory } @@ -77,6 +77,82 @@ task Sage { knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Sage { + input { + String tumorName + File tumorBam + String? normalName + File? normalBam + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File hotspots + File panelBed + File highConfidenceBed + String assembly = "hg38" + String outputPath = "./sage.vcf.gz" + + Int timeMinutes = 60 #FIXME I've no idea how long this takes... + String javaXmx = "32G" + String memory = "33G" + Int threads = 2 + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + } + + command { + java -Xmx~{javaXmx} \ + -cp /usr/local/share/hmftools-sage-2.2-0/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{assembly} \ + -threads ~{threads} \ + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + time_minutes: timeMinutes + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A VCF file containg hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file containing a panel of genes of intrest.", category: "required"} + highConfidenceBed: {description: "A bed file containing high confidence regions.", category: "required"} + assembly: {description: "The genome assembly used, either \"hg19\" or \"hg38\".", category: "common"} + outputPath: {description: "The path to write the output VCF to.", category: "common"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 4b249fde4a8e5558039553e4c2e7fa78a5251e6d Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 4 Sep 2020 15:37:32 +0200 Subject: [PATCH 045/668] replace binary digits to boolean --- survivor.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/survivor.wdl b/survivor.wdl index e5ac7b5b..b9583009 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -27,9 +27,9 @@ task Merge { Array[File] filePaths Int breakpointDistance = 1000 Int suppVecs = 2 - Int svType = 1 - Int strandType = 1 - Int distanceBySvSize = 0 + Boolean svType = true + Boolean strandType = true + Boolean distanceBySvSize = false Int minSize = 30 String outputPath = "./survivor/merged.vcf" String memory = "24G" @@ -45,9 +45,9 @@ task Merge { fileList \ ~{breakpointDistance} \ ~{suppVecs} \ - ~{svType} \ - ~{strandType} \ - ~{distanceBySvSize} \ + ~{true=1 false=0 svType} \ + ~{true=1 false=0 strandType} \ + ~{true=1 false=0 distanceBySvSize} \ ~{minSize} \ ~{outputPath} } From f12093281cb37c0521098e8377fc7ef83bc2c618 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 4 Sep 2020 15:41:04 +0200 Subject: [PATCH 046/668] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d1805ed..121c8768 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. + bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. From 7bc3c58d309fcb20d9769180f471d79432d2e350 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 7 Sep 2020 17:26:42 +0200 Subject: [PATCH 047/668] make bcftools indexing optional --- bcftools.wdl | 102 +++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 520bcf15..5d5a1ea6 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -52,6 +52,8 @@ task Annotate { String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } + Boolean indexing = if outputType == "z" then true else false + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -77,13 +79,14 @@ task Annotate { ~{true="--single-overlaps" false="" singleOverlaps} \ ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ ~{inputFile} - bcftools index --tbi ~{outputPath} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File? outputVcfIndex = outputPath + ".tbi" } runtime { @@ -132,6 +135,8 @@ task Sort { String outputType = "z" } + Boolean indexing = if outputType == "z" then true else false + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -139,12 +144,13 @@ task Sort { -o ~{outputPath} \ -O ~{outputType} \ ~{inputFile} - bcftools index --tbi ~{outputPath} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File? outputVcfIndex = outputPath + ".tbi" } runtime { @@ -165,50 +171,6 @@ task Sort { } -task View { - input { - File inputFile - String outputPath = "output.vcf.gz" - String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) - String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" - String outputType = "z" - Int compressionLevel = 1 - } - - command { - set -e - mkdir -p "$(dirname ~{outputPath})" - bcftools view \ - -o ~{outputPath} \ - -O ~{outputType} \ - -l ~{compressionLevel} \ - ~{inputFile} - bcftools index --tbi ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - inputFile: {description: "A vcf or bcf file.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - } -} - task Stats { input { File inputVcf @@ -313,3 +275,47 @@ task Stats { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} } } + +task View { + input { + File inputFile + String outputPath = "output.vcf" + Int compressionLevel = 0 + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + String outputType = if compressionLevel > 0 then "z" else "v" + Boolean indexing = if compressionLevel > 0 then true else false + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools view \ + -o ~{outputPath} \ + -l ~{compressionLevel} \ + -O ~{outputType} \ + ~{inputFile} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + } + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 5781179d1b806467b8ffc8d5a39e41d6e7c58a5c Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 7 Sep 2020 17:54:35 +0200 Subject: [PATCH 048/668] made output extension depends on compression level --- bcftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bcftools.wdl b/bcftools.wdl index 5d5a1ea6..10db8b98 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -288,6 +288,7 @@ task View { String outputType = if compressionLevel > 0 then "z" else "v" Boolean indexing = if compressionLevel > 0 then true else false + String outputFilePath = if compressionLevel > 0 then outputPath + ".gz" else outputPath command { set -e From 33cdf52e284dd503054f2668b178662e2f7ff152 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Sep 2020 13:34:35 +0200 Subject: [PATCH 049/668] update collect-columns to 1.0.0 --- collect-columns.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/collect-columns.wdl b/collect-columns.wdl index e4e3a948..fe41c5e8 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -29,13 +29,14 @@ task CollectColumns { Int? separator Array[String]? sampleNames Boolean header = false + Boolean sumOnDuplicateId = false Array[String]? additionalAttributes File? referenceGtf String? featureAttribute Int memoryGb = 4 + ceil(0.5 * length(inputTables)) Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/collect-columns:0.2.0--py_1" + String dockerImage = "quay.io/biocontainers/collect-columns:1.0.0--py_0" } command { @@ -49,6 +50,7 @@ task CollectColumns { ~{"-s " + separator} \ ~{true="-n" false="" defined(sampleNames)} ~{sep=" " sampleNames} \ ~{true="-H" false="" header} \ + ~{true="-S" false="" sumOnDuplicateId} \ ~{true="-a" false="" defined(additionalAttributes)} ~{sep=" " additionalAttributes} \ ~{"-g " + referenceGtf} \ ~{"-F " + featureAttribute} @@ -72,6 +74,7 @@ task CollectColumns { separator: {description: "Equivalent to the -s option of collect-columns.", category: "advanced"} sampleNames: {description: "Equivalent to the -n option of collect-columns.", category: "advanced"} header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} + sumOnDuplicateId: {description: "Equivalent to the -S flag of collect-columns.", category: "advanced"} additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", category: "advanced"} referenceGtf: {description: "Equivalent to the -g option of collect-columns.", category: "advanced"} featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} From 452b5810a358eeb915e6c5ba98525e210262811d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Sep 2020 13:40:51 +0200 Subject: [PATCH 050/668] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 121c8768..a9329bf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ collect-columns: updated docker image to version 1.0.0 and added the + `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. + bcftools: add sorting and annotation From a651adc575a7ca8707447958a84950d9378b5ee4 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 10 Sep 2020 11:53:38 +0200 Subject: [PATCH 051/668] add paramter meta compressionLevel --- bcftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bcftools.wdl b/bcftools.wdl index 10db8b98..affa805a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -314,6 +314,7 @@ task View { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 1643ff2c165b27ca8cacf66899c30ccad5e0f3b3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Sep 2020 16:29:07 +0200 Subject: [PATCH 052/668] update sage --- sage.wdl | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/sage.wdl b/sage.wdl index ba0a6137..cdce4680 100644 --- a/sage.wdl +++ b/sage.wdl @@ -31,33 +31,45 @@ task Sage { File referenceFasta File referenceFastaDict File referenceFastaFai - File knownHotspots - File codingRegsions + File hotspots + File panelBed + File highConfidenceBed + Boolean hg38 = false + String outputPath - Int timeMinutes = 60 #FIXME I've no idea how long this takes... + Int threads = 2 String javaXmx = "32G" String memory = "33G" - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" } command { - SAGE -Xmx~{javaXmx} \ + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ ~{"-reference_bam " + normalBam} \ -ref_genome ~{referenceFasta} \ - -known_hotspots ~{knownHotspots} \ - -coding_regions ~{codingRegsions} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{true="hg38" false="hg19" hg38} \ + -threads ~{threads} \ -out ~{outputPath} } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. + # This seems to be a systemic issue with R generated plots in biocontainers... } runtime { - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey cpu: threads docker: dockerImage memory: memory @@ -74,8 +86,9 @@ task Sage { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} - codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} + highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 5f61dd78277dd0d9b408ce866c9e9548b6f152a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Sep 2020 16:33:31 +0200 Subject: [PATCH 053/668] fix sage... --- sage.wdl | 89 +------------------------------------------------------- 1 file changed, 1 insertion(+), 88 deletions(-) diff --git a/sage.wdl b/sage.wdl index 251630ce..f6e8588b 100644 --- a/sage.wdl +++ b/sage.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task SageHotspot { +task Sage { input { String tumorName File tumorBam @@ -31,17 +31,11 @@ task SageHotspot { File referenceFasta File referenceFastaDict File referenceFastaFai -<<<<<<< HEAD File hotspots File panelBed File highConfidenceBed Boolean hg38 = false - String outputPath -======= - File knownHotspots - File codingRegsions String outputPath = "./sage_hotspot.vcf.gz" ->>>>>>> 2b8e422685de9ea6f63831d8780231a058c1b0cb Int threads = 2 String javaXmx = "32G" @@ -75,12 +69,8 @@ task SageHotspot { } runtime { -<<<<<<< HEAD time_minutes: timeMinutes # !UnknownRuntimeKey cpu: threads -======= - time_minutes: timeMinutes ->>>>>>> 2b8e422685de9ea6f63831d8780231a058c1b0cb docker: dockerImage memory: memory } @@ -108,80 +98,3 @@ task SageHotspot { category: "advanced"} } } - -task Sage { - input { - String tumorName - File tumorBam - String? normalName - File? normalBam - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File hotspots - File panelBed - File highConfidenceBed - String assembly = "hg38" - String outputPath = "./sage.vcf.gz" - - Int timeMinutes = 60 #FIXME I've no idea how long this takes... - String javaXmx = "32G" - String memory = "33G" - Int threads = 2 - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" - } - - command { - java -Xmx~{javaXmx} \ - -cp /usr/local/share/hmftools-sage-2.2-0/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ - -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{assembly} \ - -threads ~{threads} \ - -out ~{outputPath} - } - - output { - File outputVcf = outputPath - } - - runtime { - time_minutes: timeMinutes - cpu: threads - docker: dockerImage - memory: memory - } - - parameter_meta { - tumorName: {description: "The name of the tumor sample.", category: "required"} - tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - hotspots: {description: "A VCF file containg hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file containing a panel of genes of intrest.", category: "required"} - highConfidenceBed: {description: "A bed file containing high confidence regions.", category: "required"} - assembly: {description: "The genome assembly used, either \"hg19\" or \"hg38\".", category: "common"} - outputPath: {description: "The path to write the output VCF to.", category: "common"} - - threads: {description: "The number of threads to be used.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} \ No newline at end of file From bea730a027a6a3c27675af6e4c85bf72a9aad841 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 10:49:46 +0200 Subject: [PATCH 054/668] change default outputPath of sage --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index f6e8588b..71378bc7 100644 --- a/sage.wdl +++ b/sage.wdl @@ -35,7 +35,7 @@ task Sage { File panelBed File highConfidenceBed Boolean hg38 = false - String outputPath = "./sage_hotspot.vcf.gz" + String outputPath = "./sage.vcf.gz" Int threads = 2 String javaXmx = "32G" From c6d2c3ccc41031e7759655fa274ad0323362b418 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 12:02:12 +0200 Subject: [PATCH 055/668] change bai to bamIndex in sage --- sage.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sage.wdl b/sage.wdl index 71378bc7..79458cc1 100644 --- a/sage.wdl +++ b/sage.wdl @@ -24,10 +24,10 @@ task Sage { input { String tumorName File tumorBam - File tumorBai + File tumorBamIndex String? normalName File? normalBam - File? normalBai + File? normalBamIndex File referenceFasta File referenceFastaDict File referenceFastaFai From fb14c451e290628e6666181844c47c8716510565 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 12:14:32 +0200 Subject: [PATCH 056/668] fix paramter_meta --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index 79458cc1..7c04aa99 100644 --- a/sage.wdl +++ b/sage.wdl @@ -78,7 +78,7 @@ task Sage { parameter_meta { tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} From ddf76915f2fdb19774c782a957c5403f307933a7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 13:42:00 +0200 Subject: [PATCH 057/668] fix paramter_meta --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index 7c04aa99..ab42bee8 100644 --- a/sage.wdl +++ b/sage.wdl @@ -81,7 +81,7 @@ task Sage { tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} + normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} From ece83524abd6676c9666cf8027d27cdca77a7279 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 09:57:18 +0200 Subject: [PATCH 058/668] Add timeMinutes to Classify. --- CHANGELOG.md | 2 ++ centrifuge.wdl | 70 +++----------------------------------------------- 2 files changed, 5 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9329bf5..4c22ef8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Centrifuge: Add `timeMinutes` to `Classify` task and remove broken & + unnecessary downloading tasks. + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. diff --git a/centrifuge.wdl b/centrifuge.wdl index ee305325..bc2ea462 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -110,6 +110,7 @@ task Classify { Int threads = 4 String memory = "16G" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -150,6 +151,7 @@ task Classify { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -169,6 +171,7 @@ task Classify { excludeTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be excluded in classification procedure.", category: "common"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -233,73 +236,6 @@ task Inspect { } } -task Download { - input { - String libraryPath - Array[String]? domain - String executable = "centrifuge-download" - String? preCommand - String? seqTaxMapPath - String database = "refseq" - String? assemblyLevel - String? refseqCategory - Array[String]? taxIds - Boolean filterUnplaced = false - Boolean maskLowComplexRegions = false - Boolean downloadRnaSeqs = false - Boolean modifyHeader = false - Boolean downloadGiMap = false - } - - # This will use centrifuge-download to download. - # The bash statement at the beginning is to make sure - # the directory for the SeqTaxMapPath exists. - command { - set -e -o pipefail - ~{preCommand} - ~{"mkdir -p $(dirname " + seqTaxMapPath + ")"} - ~{executable} \ - -o ~{libraryPath} \ - ~{true='-d ' false='' defined(domain)}~{sep=',' domain} \ - ~{'-a "' + assemblyLevel + '"'} \ - ~{"-c " + refseqCategory} \ - ~{true='-t' false='' defined(taxIds)} '~{sep=',' taxIds}' \ - ~{true='-r' false='' downloadRnaSeqs} \ - ~{true='-u' false='' filterUnplaced} \ - ~{true='-m' false='' maskLowComplexRegions} \ - ~{true='-l' false='' modifyHeader} \ - ~{true='-g' false='' downloadGiMap} \ - ~{database} ~{">> " + seqTaxMapPath} - } - - output { - File seqTaxMap = "~{seqTaxMapPath}" - File library = libraryPath - Array[File] fastaFiles = glob(libraryPath + "/*/*.fna") - } - } - -task DownloadTaxonomy { - input { - String taxonomyDir - String executable = "centrifuge-download" - String? preCommand - } - - command { - set -e -o pipefail - ~{preCommand} - ~{executable} \ - -o ~{taxonomyDir} \ - taxonomy - } - - output { - File taxonomyTree = taxonomyDir + "/nodes.dmp" - File nameTable = taxonomyDir + "/names.dmp" - } - } - task KReport { input { File classification From 70747bdf89e05b3ab05cfebd75f5d13dff75741b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 10:03:12 +0200 Subject: [PATCH 059/668] Update CHANGELOG. --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c22ef8b..933081d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Centrifuge: Add `timeMinutes` to `Classify` task and remove broken & - unnecessary downloading tasks. ++ Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary + downloading tasks (alternative is refseqtools). + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. From 68120ed6530bf60cc114cffdeeed143d8b132c8e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 15:19:44 +0200 Subject: [PATCH 060/668] Add NanoQC and NanoPlot. --- nanopack.wdl | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 nanopack.wdl diff --git a/nanopack.wdl b/nanopack.wdl new file mode 100644 index 00000000..59193f96 --- /dev/null +++ b/nanopack.wdl @@ -0,0 +1,175 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task NanoPlot { + input { + File inputFile + String inputFileType + String outputDir + String outputPrefix + String outputPath = outputDir + outputPrefix + Boolean outputTsvStats = true + Boolean dropOutliers = false + Boolean logLengths = false + String format = "png" + Boolean showN50 = true + String title = basename(outputPrefix) + + Int? maxLength + Int? minLength + Int? minQual + String? readType + + Int threads = 2 + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/nanoplot:1.32.0--py_0" + } + + Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + NanoPlot \ + --threads ~{threads} \ + --outdir ~{outputDir} \ + --prefix ~{outputPrefix} \ + ~{true="--tsv_stats" false="" outputTsvStats} \ + ~{true="--drop_outliers" false="" dropOutliers} \ + ~{true="--loglength" false="" logLengths} \ + --format ~{format} \ + ~{true="--N50" false="--no-N50" showN50} \ + ~{fileTypeOptions[inputFileType] + inputFile} \ + ~{"--maxlength " + maxLength} \ + ~{"--minlength " + minLength} \ + ~{"--minqual " + minQual} \ + ~{"--readtype " + readType} + } + + output { + File dynamicHistogram = outputDir + outputPrefix + "Dynamic_Histogram_Read_length.html" + File readLengthHistogram = outputDir + outputPrefix + "HistogramReadlength.png" + File lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" + File lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" + File logScaleReadLengthHistogram = outputDir + outputPrefix + "LogTransformed_HistogramReadlength.png" + File report = outputDir + outputPrefix + "NanoPlot-report.html" + File weightedHistogram = outputDir + outputPrefix + "Weighted_HistogramReadlength.png" + File weightedLogScaleHistogram = outputDir + outputPrefix + "Weighted_LogTransformed_HistogramReadlength.png" + File yieldByLength = outputDir + outputPrefix + "Yield_By_Length.png" + File? stats = outputDir + outputPrefix + "NanoStats.txt" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input file.", category: "required"} + inputFileType: {description: "The format of the read file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + outputPrefix: {description: "Output file prefix.", category: "required"} + outputTsvStats: {description: "Output the stats file as a properly formatted TSV.", category: "common"} + dropOutliers: {description: "Drop outlier reads with extreme long length.", category: "advanced"} + logLengths: {description: "Additionally show logarithmic scaling of lengths in plots.", category: "advanced"} + format: {description: "Specify the output format of the plots.", category: "required"} + showN50: {description: "Show the N50 mark in the read length histogram.", category: "common"} + title: {description: "Add a title to all plots, requires quoting if using spaces.", category: "common"} + maxLength: {description: "Hide reads longer than length specified.", category: "advanced"} + minLength: {description: "Hide reads shorter than length specified.", category: "advanced"} + minQual: {description: "Drop reads with an average quality lower than specified.", category: "advanced"} + readType: {description: "Which read type to extract information about from summary. Options are 1D, 2D, 1D2", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dynamicHistogram: {description: ""} + readLengthHistogram: {description: ""} + lengthVsQualityScatterPlotDot: {description: ""} + lengthVsQualityScatterPlotKde: {description: ""} + logScaleReadLengthHistogram: {description: ""} + report: {description: ""} + weightedHistogram: {description: ""} + weightedLogScaleHistogram: {description: ""} + yieldByLength: {description: ""} + stats: {description: ""} + } +} + +task NanoQc { + input { + File inputFile + String outputDir + Boolean directRna = false + + Int? minLength + + Int threads = 2 + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputDir})" + nanoQC \ + --outdir ~{outputDir} \ + ~{true="--rna" false="" directRna} \ + ~{"--minlen " + minLength} \ + ~{inputFile} + } + + output { + File report = outputDir + "nanoQC.html" + File log = outputDir + "NanoQC.log" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + directRna: {description: "Fastq is from direct RNA-seq and contains U nucleotides.", category: "common"} + minLength: {description: "Filters the reads on a minimal length of the given range. Also plots the given length/2 of the begin and end of the reads.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + report: {description: ""} + log: {description: ""} + } +} From 00b947f945b5da4f44812d9ea6a41347b1dc2ba7 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 16:56:58 +0200 Subject: [PATCH 061/668] Update changelog. --- CHANGELOG.md | 1 + nanopack.wdl | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85beb2eb..7b4079cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add NanoPlot and NanoQC tasks. + bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. diff --git a/nanopack.wdl b/nanopack.wdl index 59193f96..661f99de 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -59,11 +59,11 @@ task NanoPlot { ~{true="--loglength" false="" logLengths} \ --format ~{format} \ ~{true="--N50" false="--no-N50" showN50} \ - ~{fileTypeOptions[inputFileType] + inputFile} \ ~{"--maxlength " + maxLength} \ ~{"--minlength " + minLength} \ ~{"--minqual " + minQual} \ - ~{"--readtype " + readType} + ~{"--readtype " + readType} \ + ~{fileTypeOptions[inputFileType] + inputFile} } output { @@ -129,7 +129,6 @@ task NanoQc { Int? minLength - Int threads = 2 String memory = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" @@ -151,7 +150,6 @@ task NanoQc { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -163,7 +161,6 @@ task NanoQc { outputDir: {description: "Output directory path.", category: "required"} directRna: {description: "Fastq is from direct RNA-seq and contains U nucleotides.", category: "common"} minLength: {description: "Filters the reads on a minimal length of the given range. Also plots the given length/2 of the begin and end of the reads.", category: "advanced"} - threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 39aa53feeacf6a3d9b96c5adc2eec9c85eb92bba Mon Sep 17 00:00:00 2001 From: Jasper Date: Mon, 14 Sep 2020 16:59:12 +0200 Subject: [PATCH 062/668] Update CHANGELOG.md --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bd7cbf1..cf85eb0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,10 @@ version 5.0.0-dev `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. -+ bcftools: add sorting and annotation ++ bcftools: add sorting and annotation. + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. -+ Updated task gridss.wdl: add --jvmheap parameter ++ Updated task gridss.wdl: add --jvmheap parameter. + A bwa-mem2 task was created with the same interface (including usePostalt) as the bwa mem task. + bwa mem and bwa kit are now one task. The usePostalt boolean can be used to From 5b46df4bd5c4ecbd130de52e081b3e9258627188 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 16 Sep 2020 09:58:48 +0200 Subject: [PATCH 063/668] Complete parameter_meta. --- nanopack.wdl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/nanopack.wdl b/nanopack.wdl index 661f99de..ba68af1b 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -108,16 +108,16 @@ task NanoPlot { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - dynamicHistogram: {description: ""} - readLengthHistogram: {description: ""} - lengthVsQualityScatterPlotDot: {description: ""} - lengthVsQualityScatterPlotKde: {description: ""} - logScaleReadLengthHistogram: {description: ""} - report: {description: ""} - weightedHistogram: {description: ""} - weightedLogScaleHistogram: {description: ""} - yieldByLength: {description: ""} - stats: {description: ""} + dynamicHistogram: {description: "Dynamic histogram of read length."} + readLengthHistogram: {description: "Histogram of read length."} + lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} + lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} + logScaleReadLengthHistogram: {description: "Histogram of read lengths after log transformation."} + report: {description: "Html summary report."} + weightedHistogram: {description: "Weighted histogram of read lengths."} + weightedLogScaleHistogram: {description: "Weighted histogram of read lengths after log transformation."} + yieldByLength: {description: "Cumulative yield plot."} + stats: {description: "NanoStats report."} } } @@ -166,7 +166,7 @@ task NanoQc { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - report: {description: ""} - log: {description: ""} + report: {description: "Html summary report."} + log: {description: "Progress report."} } } From af550dd024ff6fe5df365ebec58808f8517b2516 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 16 Sep 2020 12:11:06 +0200 Subject: [PATCH 064/668] Make some outputs optional. --- nanopack.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nanopack.wdl b/nanopack.wdl index ba68af1b..6860cf13 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -69,13 +69,13 @@ task NanoPlot { output { File dynamicHistogram = outputDir + outputPrefix + "Dynamic_Histogram_Read_length.html" File readLengthHistogram = outputDir + outputPrefix + "HistogramReadlength.png" - File lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" - File lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" File logScaleReadLengthHistogram = outputDir + outputPrefix + "LogTransformed_HistogramReadlength.png" File report = outputDir + outputPrefix + "NanoPlot-report.html" File weightedHistogram = outputDir + outputPrefix + "Weighted_HistogramReadlength.png" File weightedLogScaleHistogram = outputDir + outputPrefix + "Weighted_LogTransformed_HistogramReadlength.png" File yieldByLength = outputDir + outputPrefix + "Yield_By_Length.png" + File? lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" + File? lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" File? stats = outputDir + outputPrefix + "NanoStats.txt" } @@ -110,13 +110,13 @@ task NanoPlot { # outputs dynamicHistogram: {description: "Dynamic histogram of read length."} readLengthHistogram: {description: "Histogram of read length."} - lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} - lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} logScaleReadLengthHistogram: {description: "Histogram of read lengths after log transformation."} report: {description: "Html summary report."} weightedHistogram: {description: "Weighted histogram of read lengths."} weightedLogScaleHistogram: {description: "Weighted histogram of read lengths after log transformation."} yieldByLength: {description: "Cumulative yield plot."} + lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} + lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} stats: {description: "NanoStats report."} } } From 041721c1f49d981e18477ad208ecad3580fb9dbd Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 21 Sep 2020 16:10:57 +0200 Subject: [PATCH 065/668] Remove metrics file. --- CHANGELOG.md | 2 ++ centrifuge.wdl | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce42941e..b11e4223 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Centrifuge: Remove metrics file from classification (which causes the + summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 + Add NanoPlot and NanoQC tasks. + Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary downloading tasks (alternative is refseqtools). diff --git a/centrifuge.wdl b/centrifuge.wdl index bc2ea462..1e7a0b45 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -128,7 +128,6 @@ task Classify { ~{inputFormatOptions[inputFormat]} \ ~{true="--phred64" false="--phred33" phred64} \ --min-hitlen ~{minHitLength} \ - ~{"--met-file " + outputPrefix + "_alignment_metrics.tsv"} \ --threads ~{threads} \ ~{"--trim5 " + trim5} \ ~{"--trim3 " + trim3} \ @@ -143,7 +142,6 @@ task Classify { >>> output { - File metrics = outputPrefix + "_alignment_metrics.tsv" File classification = outputPrefix + "_classification.tsv" File report = outputPrefix + "_output_report.tsv" } @@ -175,7 +173,6 @@ task Classify { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - metrics: {description: "File with centrifuge metrics."} classification: {description: "File with the classification results."} report: {description: "File with a classification summary."} } From fbbfc5bec27636e709de907c871efaab24d8f1c1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 21 Sep 2020 16:13:57 +0200 Subject: [PATCH 066/668] Change indexing. --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b11e4223..142622e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + Centrifuge: Remove metrics file from classification (which causes the - summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 + summary report to be empty). + https://github.com/DaehwanKimLab/centrifuge/issues/83 + Add NanoPlot and NanoQC tasks. + Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary downloading tasks (alternative is refseqtools). From 66852ef0a1f5a08259a0f8eafc01d7a5d2bf1732 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Sep 2020 16:39:10 +0200 Subject: [PATCH 067/668] add snpeff task --- snpeff.wdl | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 snpeff.wdl diff --git a/snpeff.wdl b/snpeff.wdl new file mode 100644 index 00000000..e1c0184f --- /dev/null +++ b/snpeff.wdl @@ -0,0 +1,73 @@ +version 1.0 + +task snpEff { + input { + File vcf + File vcfIndex + String genomeVersion + File datadirZip + String outputPath = "./snpeff.vcf" + Boolean hgvs = true + Boolean lof = true + Boolean noDownstream = false + Boolean noIntergenic = false + Boolean noShiftHgvs = false + Int? upDownStreamLen + + String memory = "50G" + String javaXmx = "49G" + Int timeMinutes = 60 #FIXME + String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + unzip ~{datadirZip} + snpEff -Xmx~{javaXmx}G -XX:ParallelGCThreads=1 \ + -v \ + ~{genomeVersion} \ + -noDownload \ + -dataDir $PWD/data \ + ~{vcf} \ + ~{true="-hgvs" false="-noHgvs" hgvs} \ + ~{true="-lof" false="-noLof" lof} \ + ~{true="-no-downstream" false="" noDownstream} \ + ~{true="-no-intergenic" false="" noIntergenic} \ + ~{true="-noShiftHgvs" false="" noShiftHgvs} \ + ~{"-upDownStreamLen " + upDownStreamLen} \ + > ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + vcf: {description: "A VCF file to analyse.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "required"} + genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"} + datadirZip: {description: "A zip file containing the directory of databases. This zip file must contain a directory called `data`, with the database mentioned in the genomeVersion input as subdirectory.", + category: "required"} + outputPath: {description: "The path to write the output to.", category: "common"} + hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"} + lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"} + noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"} + noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"} + noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"} + upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 3ee13418733a762df9883266a73d14426bd26118 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 1 Oct 2020 09:47:52 +0200 Subject: [PATCH 068/668] typo --- snpeff.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index e1c0184f..95383b94 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -1,6 +1,6 @@ version 1.0 -task snpEff { +task SnpEff { input { File vcf File vcfIndex From 35bc2ba3fe927ed842464444506f191f4c268c84 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Fri, 2 Oct 2020 13:17:12 +0200 Subject: [PATCH 069/668] Add parameter meta for threads --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index c155f026..dd771415 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -362,6 +362,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 81095b1bb400c28b3ad01cfb6ddef7b6a74907ed Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Fri, 2 Oct 2020 13:19:18 +0200 Subject: [PATCH 070/668] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142622e2..e2f266e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Samtools: Add parameter meta for Merge task + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From 38333745daff01234eb36e178fb97ffb76c87d84 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Oct 2020 14:20:32 +0200 Subject: [PATCH 071/668] fix bcftools filter --- bcftools.wdl | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index affa805a..b1d6e5f0 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -125,6 +125,53 @@ task Annotate { } } +task Filter { + input { + File vcf + File vcfIndex + Array[String] include = [] + String outputPath = "./filtered.vcf.gz" + + String memory = "256M" + Int timeMinutes = 1 + ceil(size(vcf, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools \ + filter \ + ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ + ~{vcf} \ + -O z \ + -o ~{outputPath} + bctools index --tbi ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + vcf: {description: "The VCF file to operate on.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "required"} + include: {description: "Equivalent to the `-i` option.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + } +} + task Sort { input { File inputFile From 66399ba333105934575da4ff97e43f6e35ef06d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 5 Oct 2020 13:07:13 +0200 Subject: [PATCH 072/668] fix whitespace --- bcftools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index b1d6e5f0..619c1733 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -141,11 +141,11 @@ task Filter { set -e mkdir -p "$(dirname ~{outputPath})" bcftools \ - filter \ + filter \ ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ - ~{vcf} \ - -O z \ - -o ~{outputPath} + ~{vcf} \ + -O z \ + -o ~{outputPath} bctools index --tbi ~{outputPath} } From 28bd67e696bfb2302920cc76245f3a6a86161948 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 6 Oct 2020 13:40:28 +0200 Subject: [PATCH 073/668] Update CHANGELOG.md Co-authored-by: Davy Cats --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2f266e9..7668cd2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Samtools: Add parameter meta for Merge task ++ Samtools: Add `threads` to parameter meta for Merge task + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From afe600065e0d94a80ba68bba5f23bed8a9f52293 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 6 Oct 2020 13:40:44 +0200 Subject: [PATCH 074/668] Update samtools.wdl Co-authored-by: Davy Cats --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index dd771415..24d95aa4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -362,7 +362,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - threads: {description: "Number of threads to use.", category: "common"} + threads: {description: "Number of threads to use.", category: "advanced"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 09372028e140528ccc255b73c87b48ad45a93a77 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 6 Oct 2020 16:20:29 +0200 Subject: [PATCH 075/668] fix bcftools filter --- bcftools.wdl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 619c1733..0be3be93 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -129,7 +129,9 @@ task Filter { input { File vcf File vcfIndex - Array[String] include = [] + String? include + String? exclude + String? softFilter String outputPath = "./filtered.vcf.gz" String memory = "256M" @@ -142,7 +144,9 @@ task Filter { mkdir -p "$(dirname ~{outputPath})" bcftools \ filter \ - ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ + ~{"-i " + include} \ + ~{"-e " + exclude} \ + ~{"-s " + softFilter} ~{vcf} \ -O z \ -o ~{outputPath} From 2fdabcca7e4bba7e1ba2a30d6e47dfb478e58e11 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 7 Oct 2020 13:54:41 +0200 Subject: [PATCH 076/668] Update parameter_meta. --- CHANGELOG.md | 1 + nanopack.wdl | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142622e2..803c221b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ NanoPack: Add parameter_meta to NanoPlot task. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 diff --git a/nanopack.wdl b/nanopack.wdl index 6860cf13..e4d15135 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -92,6 +92,7 @@ task NanoPlot { inputFileType: {description: "The format of the read file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputPrefix: {description: "Output file prefix.", category: "required"} + outputPath: {description: "Combination of the outputDir & outputPrefix strings.", category: "advanced"} outputTsvStats: {description: "Output the stats file as a properly formatted TSV.", category: "common"} dropOutliers: {description: "Drop outlier reads with extreme long length.", category: "advanced"} logLengths: {description: "Additionally show logarithmic scaling of lengths in plots.", category: "advanced"} From 6eaf21442d9352266f0ac3e108cf1dc084c1c9f4 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:07:39 +0200 Subject: [PATCH 077/668] Ensure that the index and bamfiles are in the same folder --- bam2fastx.wdl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 42240cd4..18434755 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -91,12 +91,25 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + + # Localise the bam and pbi files so they are next to each other in the + # current folder + bamfiles="" + for bamfile in ~{sep=" " bam};do + ln $bamfile . + bamfiles=$bamfiles" $(basename $bamfile)" + done + + for bamindex in ~{sep=" " bamIndex}; do + ln $bamindex . + done + bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " bam} + $bamfiles } output { From 0dd0afd61c43b625146adce4b4507ec85803381a Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:08:02 +0200 Subject: [PATCH 078/668] Add bam index file as required input for isoseq --- isoseq3.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/isoseq3.wdl b/isoseq3.wdl index 604a71d5..7894b382 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -26,6 +26,7 @@ task Refine { Boolean requirePolyA = false String logLevel = "WARN" File inputBamFile + File inputBamIndex File primerFile String outputDir String outputNamePrefix From 571544cbcbeeda14eadce3b7d633626fcb4f518e Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:08:23 +0200 Subject: [PATCH 079/668] Simplify lima output structure --- lima.wdl | 32 ++++++++++---------------------- samtools.wdl | 1 + 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/lima.wdl b/lima.wdl index 1a40b1c8..38cf2d6e 100644 --- a/lima.wdl +++ b/lima.wdl @@ -58,7 +58,6 @@ task Lima { command { set -e - mkdir -p "$(dirname ~{outputPrefix})" lima \ ~{libraryDesignOptions[libraryDesign]} \ ~{true="--score-full-pass" false="" scoreFullPass} \ @@ -83,32 +82,21 @@ task Lima { ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ + ~{"--log-file " + outputPrefix + ".stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{basename(outputPrefix) + ".fl.bam"} - - # copy commands below are needed because glob command does not find - # multiple bam/bam.pbi/subreadset.xml files when not located in working - # directory. - cp "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" - cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" - cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" - cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" - find . -path "*.bam" > bamFiles.txt - find . -path "*.bam.pbi" > bamIndexes.txt - find . -path "*.subreadset.xml" > subreadsets.txt + ~{outputPrefix + ".bam"} } output { - Array[File] limaBam = read_lines("bamFiles.txt") - Array[File] limaBamIndex = read_lines("bamIndexes.txt") - Array[File] limaXml = read_lines("subreadsets.txt") - File limaStderr = outputPrefix + ".fl.stderr.log" - File limaJson = outputPrefix + ".fl.json" - File limaCounts = outputPrefix + ".fl.lima.counts" - File limaReport = outputPrefix + ".fl.lima.report" - File limaSummary = outputPrefix + ".fl.lima.summary" + Array[File] limaBam = glob("*.bam") + Array[File] limaBamIndex = glob("*.bam.pbi") + Array[File] limaXml = glob("*.subreadset.xml") + File limaStderr = outputPrefix + ".stderr.log" + File limaJson = outputPrefix + ".json" + File limaCounts = outputPrefix + ".lima.counts" + File limaReport = outputPrefix + ".lima.report" + File limaSummary = outputPrefix + ".lima.summary" } runtime { diff --git a/samtools.wdl b/samtools.wdl index c155f026..dd771415 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -362,6 +362,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 5ca9c5e22734456a7735ce383d695877e6cb9c08 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:09:11 +0200 Subject: [PATCH 080/668] Add task for indexing PacBio bam files --- pbbam.wdl | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 pbbam.wdl diff --git a/pbbam.wdl b/pbbam.wdl new file mode 100644 index 00000000..368ff4ed --- /dev/null +++ b/pbbam.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + +task Index { + input { + File bamFile + String? outputBamPath + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" + } + + # Select_first is needed, otherwise womtool validate fails. + String outputPath = select_first([outputBamPath, basename(bamFile)]) + String bamIndexPath = outputPath + ".pbi" + + command { + bash -c ' + set -e + # Make sure outputBamPath does not exist. + if [ ! -f ~{outputPath} ] + then + mkdir -p "$(dirname ~{outputPath})" + ln ~{bamFile} ~{outputPath} + fi + pbindex ~{outputPath} ~{bamIndexPath} + ' + } + + output { + File indexedBam = outputPath + File index = bamIndexPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bamFile: {description: "The BAM file for which an index should be made.", category: "required"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", + category: "common"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From d8848dc95d73402eb92483456a35eaac9040a83e Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:10:05 +0200 Subject: [PATCH 081/668] Make intervals optional for gatk GenotypeGVCFs --- gatk.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index e0209a0c..12416dda 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -820,7 +820,7 @@ task GenotypeGVCFs { input { File gvcfFile File gvcfFileIndex - Array[File]+ intervals + Array[File]? intervals String outputPath File referenceFasta File referenceFastaDict @@ -846,9 +846,9 @@ task GenotypeGVCFs { ~{"-D " + dbsnpVCF} \ ~{"--pedigree " + pedigree} \ ~{true="-G" false="" length(annotationGroups) > 0} ~{sep=" -G " annotationGroups} \ - --only-output-calls-starting-in-intervals \ -V ~{gvcfFile} \ - -L ~{sep=' -L ' intervals} + ~{true="--only-output-calls-starting-in-intervals" false="" defined(intervals)} \ + ~{true="-L" false="" defined(intervals)} ~{sep=' -L ' intervals} } output { @@ -866,7 +866,7 @@ task GenotypeGVCFs { parameter_meta { gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"} gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"} - intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"} outputPath: {description: "The location to write the output VCF file to.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} From d786fcec2cf3b7ecbe0cdbccbe412cef382fac71 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:10:42 +0200 Subject: [PATCH 082/668] Increase runtime and add sample name for pbmm2 --- pbmm2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 84fbd2d0..31d4c667 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -30,7 +30,7 @@ task Mapping { Int cores = 4 String memory = "30G" - Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" } @@ -41,6 +41,7 @@ task Mapping { -j ~{cores} \ ~{referenceMMI} \ ~{queryFile} \ + --sample ~{sample} \ ~{sample}.align.bam } From a2ae010f8efa3f9d03ea99b61038419956be98b3 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:17:10 +0200 Subject: [PATCH 083/668] Add HsMetrics and VariantcallingMetrics to picard --- picard.wdl | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/picard.wdl b/picard.wdl index 1afa5ea7..9603db8c 100644 --- a/picard.wdl +++ b/picard.wdl @@ -66,6 +66,70 @@ task BedToIntervalList { } } +task CollectHsMetrics { + input { + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File targets + File? baits + String basename + + + # Use the targets file as baits as a fallback, since often the baits + # for a certain capture kit are not available. + File baitsFile = select_first([baits, targets]) + File targetsFile = targets + + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 3072 + # Additional * 2 because picard multiple metrics reads the reference fasta twice. + Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \ + CollectHsMetrics \ + I=~{inputBam} \ + R=~{referenceFasta} \ + BAIT_INTERVALS=~{baitsFile} \ + TARGET_INTERVALS=~{targetsFile} \ + O="~{basename}.hs_metrics.txt" + } + + output { + File HsMetrics = basename + ".hs_metrics.txt" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: "~{memoryMb}M" + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CollectMultipleMetrics { input { File inputBam @@ -315,6 +379,53 @@ task CollectTargetedPcrMetrics { } } +task CollectVariantCallingMetrics { + input { + File dbsnp + File dbsnpIndex + File inputVCF + File inputVCFIndex + String basename + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmx} \ + CollectVariantCallingMetrics -XX:ParallelGCThreads=1 \ + DBSNP=~{dbsnp} \ + INPUT=~{inputVCF} \ + OUTPUT=~{basename} + } + + output { + File details = basename + ".variant_calling_detail_metrics" + File summary = basename + ".variant_calling_summary_metrics" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CreateSequenceDictionary { input { File inputFile From e2fbf4a0275a9ae27de653513cd9c6f1b6340915 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:17:25 +0200 Subject: [PATCH 084/668] Add deepvariant tasks --- deepvariant.wdl | 91 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 deepvariant.wdl diff --git a/deepvariant.wdl b/deepvariant.wdl new file mode 100644 index 00000000..88bdb352 --- /dev/null +++ b/deepvariant.wdl @@ -0,0 +1,91 @@ +version 1.0 + +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task RunDeepVariant { + input { + File referenceFasta + File referenceFastaIndex + File inputBam + File inputBamIndex + String modelType + String outputVcf + File? customizedModel + Int? numShards + String? outputGVcf + File? regions + String? sampleName + Boolean? VCFStatsReport = true + + String memory = "3G" + Int timeMinutes = 5000 + String dockerImage = "google/deepvariant:1.0.0" + } + + command { + set -e + + /opt/deepvariant/bin/run_deepvariant \ + --ref ~{referenceFasta} \ + --reads ~{inputBam} \ + --model_type ~{modelType} \ + --output_vcf ~{outputVcf} \ + ~{"--output_gvcf " + outputGVcf} \ + ~{"--customized_model " + customizedModel} \ + ~{"--num_shards " + numShards} \ + ~{"--regions} " + regions} \ + ~{"--sample_name " + sampleName} \ + ~{true="--vcf_stats_report" false="--novcf_stats_report" VCFStatsReport} + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + output { + File outputVCF = outputVcf + File outputVCFIndex = outputVCF + ".tbi" + File? outputGVCF = outputGVcf + File? outputGVCFIndex = outputGVcf + ".tbi" + Array[File] outputVCFStatsReport = glob("*.visual_report.html") + } + + parameter_meta { + referenceFasta: {description: "Genome reference to use", category: "required"} + referenceFastaIndex: {description: "Index for the genome reference file.", category: "required"} + inputBam: {description: "Aligned, sorted, indexed BAM file containing the reads we want to call.", category: "required"} + inputBamIndex: {description: "Index for the input bam file.", category: "required"} + modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag", category: "required"} + outputVcf: {description: "Path where we should write VCF file.", category: "required"} + customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used", category: "advanced"} + numShards: {description: "Number of shards for make_examples step.", category: "common"} + outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} + regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} + sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} + VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 41024c35d01b0a954a0eaf6f4f69ab93ec02833b Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:17:33 +0200 Subject: [PATCH 085/668] Add whatshap tasks --- whatshap.wdl | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 whatshap.wdl diff --git a/whatshap.wdl b/whatshap.wdl new file mode 100644 index 00000000..2506aa10 --- /dev/null +++ b/whatshap.wdl @@ -0,0 +1,275 @@ +version 1.0 + +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +task Phase { + input { + String outputVCF + File? reference + File? referenceIndex + Boolean? no_reference + String? tag + File? output_read_list + String? algorithm + Boolean? merge_reads + String? internal_downsampling + String? mapping_quality + Boolean? indels + Boolean? ignore_read_groups + String? sample + String? chromosome + String? error_rate + String? maximum_error_rate + String? threshold + String? negative_threshold + Boolean? full_genotyping + Boolean? distrust_genotypes + Boolean? include_homozygous + String? default_gq + String? gl_regularize_r + File? changed_genotype_list + String? ped + File? recombination_list + String? recomb_rate + File? gen_map + Boolean? no_genetic_haplo_typing + Boolean? use_ped_samples + File vcf + File vcfIndex + File phaseInput + File phaseInputIndex + + String memory = "4G" + Int timeMinutes = 120 + # Whatshap 1.0, tabix 0.2.5 + String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" + } + + command { + whatshap phase \ + ~{vcf} \ + ~{phaseInput} \ + ~{if defined(outputVCF) then ("--output " + '"' + outputVCF + '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{true="--no-reference" false="" no_reference} \ + ~{if defined(tag) then ("--tag " + '"' + tag + '"') else ""} \ + ~{if defined(output_read_list) then ("--output-read-list " + '"' + output_read_list + '"') else ""} \ + ~{if defined(algorithm) then ("--algorithm " + '"' + algorithm + '"') else ""} \ + ~{true="--merge-reads" false="" merge_reads} \ + ~{if defined(internal_downsampling) then ("--internal-downsampling " + '"' + internal_downsampling + '"') else ""} \ + ~{if defined(mapping_quality) then ("--mapping-quality " + '"' + mapping_quality + '"') else ""} \ + ~{true="--indels" false="" indels} \ + ~{true="--ignore-read-groups" false="" ignore_read_groups} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ + ~{if defined(error_rate) then ("--error-rate " + '"' + error_rate + '"') else ""} \ + ~{if defined(maximum_error_rate) then ("--maximum-error-rate " + '"' + maximum_error_rate + '"') else ""} \ + ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ + ~{if defined(negative_threshold) then ("--negative-threshold " + '"' + negative_threshold + '"') else ""} \ + ~{true="--full-genotyping" false="" full_genotyping} \ + ~{true="--distrust-genotypes" false="" distrust_genotypes} \ + ~{true="--include-homozygous" false="" include_homozygous} \ + ~{if defined(default_gq) then ("--default-gq " + '"' + default_gq + '"') else ""} \ + ~{if defined(gl_regularize_r) then ("--gl-regularizer " + '"' + gl_regularize_r + '"') else ""} \ + ~{if defined(changed_genotype_list) then ("--changed-genotype-list " + '"' + changed_genotype_list + '"') else ""} \ + ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ + ~{if defined(recombination_list) then ("--recombination-list " + '"' + recombination_list + '"') else ""} \ + ~{if defined(recomb_rate) then ("--recombrate " + '"' + recomb_rate + '"') else ""} \ + ~{if defined(gen_map) then ("--genmap " + '"' + gen_map + '"') else ""} \ + ~{true="--no-genetic-haplotyping" false="" no_genetic_haplo_typing} \ + ~{true="--use-ped-samples" false="" use_ped_samples} && \ + tabix -p vcf ~{outputVCF} + } + + output { + File phasedVCF = outputVCF + File phasedVCFIndex = outputVCF + ".tbi" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} + no_reference: {description: "Detect alleles without requiring a reference, at the expense of phasing quality (in particular for long reads)", category: "common"} + tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} + output_read_list: {description: "Write reads that have been used for phasing to FILE.", category: "advanced"} + algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} + merge_reads: {description: "Merge reads which are likely to come from the same haplotype (default: {description: do not merge reads)", category: "common"} + internal_downsampling: {description: "Coverage reduction parameter in the internal core phasing algorithm. Higher values increase runtime *exponentially* while possibly improving phasing quality marginally. Avoid using this in the normal case! (default: {description: 15)", category: "advanced"} + mapping_quality: {description: "Minimum mapping quality (default: {description: 20)", category: "common"} + indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} + ignore_read_groups: {description: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample.", category: "advanced"} + sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} + chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} + error_rate: {description: "The probability that a nucleotide is wrong in read merging model (default: {description: 0.15).", category: "advanced"} + maximum_error_rate: {description: "The maximum error rate of any edge of the read merging graph before discarding it (default: {description: 0.25).", category: "advanced"} + threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} + negative_threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from different haplotypes and the same haplotype in the read merging model (default: {description: 1000).", category: "advanced"} + full_genotyping: {description: "Completely re-genotype all variants based on read data, ignores all genotype data that might be present in the VCF (EXPERIMENTAL FEATURE).", category: "experimental"} + distrust_genotypes: {description: "Allow switching variants from hetero- to homozygous in an optimal solution (see documentation).", category: "advanced"} + include_homozygous: {description: "Also work on homozygous variants, which might be turned to heterozygous", category: "advanced"} + default_gq: {description: "Default genotype quality used as cost of changing a genotype when no genotype likelihoods are available (default 30)", category: "advanced"} + gl_regularize_r: {description: "Constant (float) to be used to regularize genotype likelihoods read from input VCF (default None).", category: "advanced"} + changed_genotype_list: {description: "Write list of changed genotypes to FILE.", category: "advanced"} + ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} + recombination_list: {description: "Write putative recombination events to FILE.", category: "advanced"} + recomb_rate: {description: "Recombination rate in cM/Mb (used with --ped). If given, a constant recombination rate is assumed (default: {description: 1.26cM/Mb).", category: "advanced"} + gen_map: {description: "File with genetic map (used with --ped) to be used instead of constant recombination rate, i.e. overrides option --recombrate.", category: "advanced"} + no_genetic_haplo_typing: {description: "Do not merge blocks that are not connected by reads (i.e. solely based on genotype status). Default: {description: when in --ped mode, merge all blocks that contain at least one homozygous genotype in at least one individual into one block.", category: "advanced"} + use_ped_samples: {description: "Only work on samples mentioned in the provided PED file.", category: "advanced"} + vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} + phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task Stats { + input { + String? gtf + String? sample + String? chr_lengths + String? tsv + Boolean? only_sn_vs + String? block_list + String? chromosome + File vcf + + String memory = "4G" + Int timeMinutes = 120 + # Whatshap 1.0, tabix 0.2.5 + String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" + } + + command { + whatshap stats \ + ~{vcf} \ + ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(chr_lengths) then ("--chr-lengths " + '"' + chr_lengths + '"') else ""} \ + ~{if defined(tsv) then ("--tsv " + '"' + tsv + '"') else ""} \ + ~{true="--only-snvs" false="" only_sn_vs} \ + ~{if defined(block_list) then ("--block-list " + '"' + block_list + '"') else ""} \ + ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} + } + + output { + File? phasedGTF = gtf + File? phasedTSV = tsv + File? phasedBlockList = block_list + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + gtf: "Write phased blocks to GTF file." + sample: "Name of the sample to process. If not given, use first sample found in VCF." + chr_lengths: "File with chromosome lengths (one line per chromosome, tab separated ' ') needed to compute N50 values." + tsv: "Filename to write statistics to (tab-separated)." + only_sn_vs: "Only process SNVs and ignore all other variants." + block_list: "Filename to write list of all blocks to (one block per line)." + chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." + vcf: "Phased VCF file" + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task Haplotag { + input { + String outputFile + File? reference + File? referenceFastaIndex + String? regions + Boolean? ignore_linked_read + String? linked_read_distance_cut_off + Boolean? ignore_read_groups + String? sample + String? output_haplo_tag_list + Boolean? tag_supplementary + File vcf + File vcfIndex + File alignments + File alignmentsIndex + + String memory = "4G" + Int timeMinutes = 120 + # Whatshap 1.0, tabix 0.2.5 + String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" + } + + command { + whatshap haplotag \ + ~{vcf} \ + ~{alignments} \ + ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ + ~{true="--ignore-linked-read" false="" ignore_linked_read} \ + ~{if defined(linked_read_distance_cut_off) then ("--linked-read-distance-cutoff " + '"' + linked_read_distance_cut_off + '"') else ""} \ + ~{true="--ignore-read-groups" false="" ignore_read_groups} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(output_haplo_tag_list) then ("--output-haplotag-list " + '"' + output_haplo_tag_list + '"') else ""} \ + ~{true="--tag-supplementary" false="" tag_supplementary} && \ + python3 -c "import pysam; pysam.index('~{outputFile}')" + } + + output { + File bam = outputFile + File bamIndex = outputFile + ".bai" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + outputFile: "Output file. If omitted, use standard output." + reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created" + regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." + ignore_linked_read: "Ignore linkage information stored in BX tags of the reads." + linked_read_distance_cut_off: "Assume reads with identical BX tags belong to different read clouds if their distance is larger than LINKEDREADDISTANCE (default: 50000)." + ignore_read_groups: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample." + sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." + output_haplo_tag_list: "Write assignments of read names to haplotypes (tab separated) to given output file. If filename ends in .gz, then output is gzipped." + tag_supplementary: "Also tag supplementary alignments. Supplementary alignments are assigned to the same haplotype the primary alignment has been assigned to (default: only tag primary alignments)." + vcf: "VCF file with phased variants (must be gzip-compressed and indexed)" + alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype" + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 788681506815ef10573eb86cea4efe22f300b5db Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:22:10 +0200 Subject: [PATCH 086/668] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142622e2..26711b72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ deepvariant: Add task for DeepVariant ++ gatk: Make intervals optional for GenotypeGVCFs ++ isoseq3: Add required bam index input to isoseq3 ++ pbbam: Add task for indexing PacBio bam files ++ picard: Add CollectHsMetrics and CollectVariantCallingMetrics + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From f531d274c8fcd0789318f08a61b2aa50bed0d3fa Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:23:53 +0200 Subject: [PATCH 087/668] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26711b72..2ef37f31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ deepvariant: Add task for DeepVariant ++ deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs + isoseq3: Add required bam index input to isoseq3 + pbbam: Add task for indexing PacBio bam files From 1f0a112b763687055b2b647d7f1845d4e57a5664 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:02 +0200 Subject: [PATCH 088/668] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ef37f31..f4d217fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + deepvariant: Add task for DeepVariant. -+ gatk: Make intervals optional for GenotypeGVCFs ++ gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3 + pbbam: Add task for indexing PacBio bam files + picard: Add CollectHsMetrics and CollectVariantCallingMetrics From d4cfd015be4aacc306454b4410bd6a98a79627bc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:11 +0200 Subject: [PATCH 089/668] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4d217fc..1f75492f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ version 5.0.0-dev --------------------------- + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. -+ isoseq3: Add required bam index input to isoseq3 ++ isoseq3: Add required bam index input to isoseq3. + pbbam: Add task for indexing PacBio bam files + picard: Add CollectHsMetrics and CollectVariantCallingMetrics + Centrifuge: Remove metrics file from classification (which causes the From 80c84a4ae5946a0297bc0f30afaec66f327a8d55 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:20 +0200 Subject: [PATCH 090/668] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f75492f..6230afbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ version 5.0.0-dev + gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3. + pbbam: Add task for indexing PacBio bam files -+ picard: Add CollectHsMetrics and CollectVariantCallingMetrics ++ picard: Add CollectHsMetrics and CollectVariantCallingMetrics. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From 31bbeddf090f618084a71ecbd33a90842aa46b40 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:35 +0200 Subject: [PATCH 091/668] Update picard.wdl Co-authored-by: Jasper --- picard.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 9603db8c..d6b23245 100644 --- a/picard.wdl +++ b/picard.wdl @@ -77,7 +77,6 @@ task CollectHsMetrics { File? baits String basename - # Use the targets file as baits as a fallback, since often the baits # for a certain capture kit are not available. File baitsFile = select_first([baits, targets]) From b6178110f9824758ac3a4e94f025825d23c170a2 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:36:20 +0200 Subject: [PATCH 092/668] Update parameter meta --- isoseq3.wdl | 1 + picard.wdl | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/isoseq3.wdl b/isoseq3.wdl index 7894b382..5060f0e7 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -73,6 +73,7 @@ task Refine { requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "Bam input file.", category: "required"} + inputBamIndex: {description: "Index for the Bam input file.", category: "required"} primerFile: {description: "Barcode/primer fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputNamePrefix: {description: "Basename of the output files.", category: "required"} diff --git a/picard.wdl b/picard.wdl index d6b23245..b5ad0cb4 100644 --- a/picard.wdl +++ b/picard.wdl @@ -415,6 +415,10 @@ task CollectVariantCallingMetrics { parameter_meta { # inputs + dbsnp: {description: "DBSNP vcf file to use with CollectVariantCallingMetrics.", category: "required"} + dbsnpIndex: {description: "Index file for the DBSNP VCF.", category: "required"} + inputVCF: {description: "Input VCF file", category: "required"} + inputVCFIndex: {description: "Index file for the input VCF.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 353224aadecf82940e915424a017870ff2580d20 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:42:45 +0200 Subject: [PATCH 093/668] Add parameter meta for CollectHsMetrics --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index b5ad0cb4..49db8b8b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -119,6 +119,8 @@ task CollectHsMetrics { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + targets: {description: "Picard interval file of the capture targets.", category: "required"} + baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", From 7c065d4046a50c89727a1377618919a14814d9c2 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 9 Oct 2020 11:29:04 +0200 Subject: [PATCH 094/668] remove outputType and indexing instead based on extension of the file --- bcftools.wdl | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index affa805a..e2251331 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -35,7 +35,6 @@ task Annotate { Boolean keepSites = false String? markSites Boolean noVersion = false - String outputType = "z" String? regions File? regionsFile File? renameChrs @@ -52,14 +51,14 @@ task Annotate { String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } - Boolean indexing = if outputType == "z" then true else false + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools annotate \ -o ~{outputPath} \ - -O ~{outputType} \ + -O ~{true="z" false="v" compressed} \ ~{"--annotations " + annsFile} \ ~{"--collapse " + collapse} \ ~{true="--columns" false="" length(columns) > 0} ~{sep="," columns} \ @@ -80,7 +79,7 @@ task Annotate { ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ ~{inputFile} - ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } @@ -97,7 +96,6 @@ task Annotate { parameter_meta { outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} @@ -132,20 +130,19 @@ task Sort { String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" - String outputType = "z" } - Boolean indexing = if outputType == "z" then true else false + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools sort \ -o ~{outputPath} \ - -O ~{outputType} \ + -O ~{true="z" false="v" compressed} \ ~{inputFile} - ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } output { @@ -162,7 +159,6 @@ task Sort { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -280,26 +276,22 @@ task View { input { File inputFile String outputPath = "output.vcf" - Int compressionLevel = 0 String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } - String outputType = if compressionLevel > 0 then "z" else "v" - Boolean indexing = if compressionLevel > 0 then true else false - String outputFilePath = if compressionLevel > 0 then outputPath + ".gz" else outputPath + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ -o ~{outputPath} \ - -l ~{compressionLevel} \ - -O ~{outputType} \ + -O ~{true="z" false="v" compressed} \ ~{inputFile} - ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath @@ -314,7 +306,6 @@ task View { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d96e2b14a6cd362b1d7cf8e613e10a19ee98e315 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 9 Oct 2020 11:47:20 +0200 Subject: [PATCH 095/668] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9329bf5..cfda7abb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ bcftools: remove outputType and implement indexing based on output file extension. + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. From 14d3118230bd2e42e5dec40e5312091518b6ab19 Mon Sep 17 00:00:00 2001 From: Jasper Date: Mon, 12 Oct 2020 13:25:10 +0200 Subject: [PATCH 096/668] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7668cd2b..11a39d89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Samtools: Add `threads` to parameter meta for Merge task ++ Samtools: Add `threads` to parameter meta for Merge task. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From 153438890ea1068846522b7e6386256bba48ab71 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 12 Oct 2020 15:53:41 +0200 Subject: [PATCH 097/668] add tmpDir input to specify temporary directory when sorting. --- CHANGELOG.md | 1 + bcftools.wdl | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c32d349..700bf0b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ bcftools: add tmpDir input to specify temporary directory when sorting. + bcftools: remove outputType and implement indexing based on output file extension. + NanoPack: Add parameter_meta to NanoPlot task. + Centrifuge: Remove metrics file from classification (which causes the diff --git a/bcftools.wdl b/bcftools.wdl index e2251331..63f2cacb 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -127,6 +127,7 @@ task Sort { input { File inputFile String outputPath = "output.vcf.gz" + String tmpDir = "./sorting-tmp" String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -136,10 +137,11 @@ task Sort { command { set -e - mkdir -p "$(dirname ~{outputPath})" + mkdir -p "$(dirname ~{outputPath})" ~{tmpDir} bcftools sort \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ + -T ~{tmpDir} \ ~{inputFile} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} From 72ad1f1b4c6123a72518de01e36c0ba6a79657bb Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 12 Oct 2020 16:21:06 +0200 Subject: [PATCH 098/668] add tmpDir to parameter_meta section --- bcftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bcftools.wdl b/bcftools.wdl index 63f2cacb..a0aeb442 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -161,6 +161,7 @@ task Sort { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + tmpDir: {description: "The location of the temporary files during the bcftools sorting.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4760d1873df4204bb64c38f6d6c8378c41568b46 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Oct 2020 10:08:58 +0200 Subject: [PATCH 099/668] remove redundant G in -Xmx in snpeff --- snpeff.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index 95383b94..079a720a 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -24,7 +24,7 @@ task SnpEff { set -e mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} - snpEff -Xmx~{javaXmx}G -XX:ParallelGCThreads=1 \ + snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -v \ ~{genomeVersion} \ -noDownload \ From 3fa0f1411831448f15e17506dfef9230b303a5f1 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 15 Oct 2020 15:38:45 +0200 Subject: [PATCH 100/668] Remove most inputs --- whatshap.wdl | 110 ++++++++------------------------------------------- 1 file changed, 16 insertions(+), 94 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 2506aa10..1334d45b 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -26,33 +26,13 @@ task Phase { String outputVCF File? reference File? referenceIndex - Boolean? no_reference String? tag - File? output_read_list String? algorithm - Boolean? merge_reads - String? internal_downsampling - String? mapping_quality Boolean? indels - Boolean? ignore_read_groups String? sample String? chromosome - String? error_rate - String? maximum_error_rate String? threshold - String? negative_threshold - Boolean? full_genotyping - Boolean? distrust_genotypes - Boolean? include_homozygous - String? default_gq - String? gl_regularize_r - File? changed_genotype_list String? ped - File? recombination_list - String? recomb_rate - File? gen_map - Boolean? no_genetic_haplo_typing - Boolean? use_ped_samples File vcf File vcfIndex File phaseInput @@ -70,33 +50,13 @@ task Phase { ~{phaseInput} \ ~{if defined(outputVCF) then ("--output " + '"' + outputVCF + '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ - ~{true="--no-reference" false="" no_reference} \ ~{if defined(tag) then ("--tag " + '"' + tag + '"') else ""} \ - ~{if defined(output_read_list) then ("--output-read-list " + '"' + output_read_list + '"') else ""} \ ~{if defined(algorithm) then ("--algorithm " + '"' + algorithm + '"') else ""} \ - ~{true="--merge-reads" false="" merge_reads} \ - ~{if defined(internal_downsampling) then ("--internal-downsampling " + '"' + internal_downsampling + '"') else ""} \ - ~{if defined(mapping_quality) then ("--mapping-quality " + '"' + mapping_quality + '"') else ""} \ ~{true="--indels" false="" indels} \ - ~{true="--ignore-read-groups" false="" ignore_read_groups} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ - ~{if defined(error_rate) then ("--error-rate " + '"' + error_rate + '"') else ""} \ - ~{if defined(maximum_error_rate) then ("--maximum-error-rate " + '"' + maximum_error_rate + '"') else ""} \ ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ - ~{if defined(negative_threshold) then ("--negative-threshold " + '"' + negative_threshold + '"') else ""} \ - ~{true="--full-genotyping" false="" full_genotyping} \ - ~{true="--distrust-genotypes" false="" distrust_genotypes} \ - ~{true="--include-homozygous" false="" include_homozygous} \ - ~{if defined(default_gq) then ("--default-gq " + '"' + default_gq + '"') else ""} \ - ~{if defined(gl_regularize_r) then ("--gl-regularizer " + '"' + gl_regularize_r + '"') else ""} \ - ~{if defined(changed_genotype_list) then ("--changed-genotype-list " + '"' + changed_genotype_list + '"') else ""} \ ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ - ~{if defined(recombination_list) then ("--recombination-list " + '"' + recombination_list + '"') else ""} \ - ~{if defined(recomb_rate) then ("--recombrate " + '"' + recomb_rate + '"') else ""} \ - ~{if defined(gen_map) then ("--genmap " + '"' + gen_map + '"') else ""} \ - ~{true="--no-genetic-haplotyping" false="" no_genetic_haplo_typing} \ - ~{true="--use-ped-samples" false="" use_ped_samples} && \ tabix -p vcf ~{outputVCF} } @@ -114,33 +74,13 @@ task Phase { parameter_meta { outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} - no_reference: {description: "Detect alleles without requiring a reference, at the expense of phasing quality (in particular for long reads)", category: "common"} tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} - output_read_list: {description: "Write reads that have been used for phasing to FILE.", category: "advanced"} algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} - merge_reads: {description: "Merge reads which are likely to come from the same haplotype (default: {description: do not merge reads)", category: "common"} - internal_downsampling: {description: "Coverage reduction parameter in the internal core phasing algorithm. Higher values increase runtime *exponentially* while possibly improving phasing quality marginally. Avoid using this in the normal case! (default: {description: 15)", category: "advanced"} - mapping_quality: {description: "Minimum mapping quality (default: {description: 20)", category: "common"} indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} - ignore_read_groups: {description: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample.", category: "advanced"} sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} - error_rate: {description: "The probability that a nucleotide is wrong in read merging model (default: {description: 0.15).", category: "advanced"} - maximum_error_rate: {description: "The maximum error rate of any edge of the read merging graph before discarding it (default: {description: 0.25).", category: "advanced"} threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} - negative_threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from different haplotypes and the same haplotype in the read merging model (default: {description: 1000).", category: "advanced"} - full_genotyping: {description: "Completely re-genotype all variants based on read data, ignores all genotype data that might be present in the VCF (EXPERIMENTAL FEATURE).", category: "experimental"} - distrust_genotypes: {description: "Allow switching variants from hetero- to homozygous in an optimal solution (see documentation).", category: "advanced"} - include_homozygous: {description: "Also work on homozygous variants, which might be turned to heterozygous", category: "advanced"} - default_gq: {description: "Default genotype quality used as cost of changing a genotype when no genotype likelihoods are available (default 30)", category: "advanced"} - gl_regularize_r: {description: "Constant (float) to be used to regularize genotype likelihoods read from input VCF (default None).", category: "advanced"} - changed_genotype_list: {description: "Write list of changed genotypes to FILE.", category: "advanced"} ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} - recombination_list: {description: "Write putative recombination events to FILE.", category: "advanced"} - recomb_rate: {description: "Recombination rate in cM/Mb (used with --ped). If given, a constant recombination rate is assumed (default: {description: 1.26cM/Mb).", category: "advanced"} - gen_map: {description: "File with genetic map (used with --ped) to be used instead of constant recombination rate, i.e. overrides option --recombrate.", category: "advanced"} - no_genetic_haplo_typing: {description: "Do not merge blocks that are not connected by reads (i.e. solely based on genotype status). Default: {description: when in --ped mode, merge all blocks that contain at least one homozygous genotype in at least one individual into one block.", category: "advanced"} - use_ped_samples: {description: "Only work on samples mentioned in the provided PED file.", category: "advanced"} vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} @@ -154,10 +94,8 @@ task Stats { input { String? gtf String? sample - String? chr_lengths String? tsv - Boolean? only_sn_vs - String? block_list + String? blockList String? chromosome File vcf @@ -168,21 +106,19 @@ task Stats { } command { - whatshap stats \ + whatshap stats \ ~{vcf} \ ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ - ~{if defined(chr_lengths) then ("--chr-lengths " + '"' + chr_lengths + '"') else ""} \ ~{if defined(tsv) then ("--tsv " + '"' + tsv + '"') else ""} \ - ~{true="--only-snvs" false="" only_sn_vs} \ - ~{if defined(block_list) then ("--block-list " + '"' + block_list + '"') else ""} \ + ~{if defined(blockList) then ("--block-list " + '"' + blockList + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} } output { - File? phasedGTF = gtf - File? phasedTSV = tsv - File? phasedBlockList = block_list + File? phasedGTF = gtf + File? phasedTSV = tsv + File? phasedBlockList = blockList } runtime { @@ -194,10 +130,8 @@ task Stats { parameter_meta { gtf: "Write phased blocks to GTF file." sample: "Name of the sample to process. If not given, use first sample found in VCF." - chr_lengths: "File with chromosome lengths (one line per chromosome, tab separated ' ') needed to compute N50 values." tsv: "Filename to write statistics to (tab-separated)." - only_sn_vs: "Only process SNVs and ignore all other variants." - block_list: "Filename to write list of all blocks to (one block per line)." + blockList: "Filename to write list of all blocks to (one block per line)." chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." vcf: "Phased VCF file" memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -212,12 +146,7 @@ task Haplotag { File? reference File? referenceFastaIndex String? regions - Boolean? ignore_linked_read - String? linked_read_distance_cut_off - Boolean? ignore_read_groups String? sample - String? output_haplo_tag_list - Boolean? tag_supplementary File vcf File vcfIndex File alignments @@ -230,24 +159,19 @@ task Haplotag { } command { - whatshap haplotag \ + whatshap haplotag \ ~{vcf} \ ~{alignments} \ ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{true="--ignore-linked-read" false="" ignore_linked_read} \ - ~{if defined(linked_read_distance_cut_off) then ("--linked-read-distance-cutoff " + '"' + linked_read_distance_cut_off + '"') else ""} \ - ~{true="--ignore-read-groups" false="" ignore_read_groups} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ - ~{if defined(output_haplo_tag_list) then ("--output-haplotag-list " + '"' + output_haplo_tag_list + '"') else ""} \ - ~{true="--tag-supplementary" false="" tag_supplementary} && \ python3 -c "import pysam; pysam.index('~{outputFile}')" } output { - File bam = outputFile - File bamIndex = outputFile + ".bai" + File bam = outputFile + File bamIndex = outputFile + ".bai" } runtime { @@ -258,16 +182,14 @@ task Haplotag { parameter_meta { outputFile: "Output file. If omitted, use standard output." - reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created" + reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." + referenceIndex: "Index for the reference file." regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." - ignore_linked_read: "Ignore linkage information stored in BX tags of the reads." - linked_read_distance_cut_off: "Assume reads with identical BX tags belong to different read clouds if their distance is larger than LINKEDREADDISTANCE (default: 50000)." - ignore_read_groups: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample." sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." - output_haplo_tag_list: "Write assignments of read names to haplotypes (tab separated) to given output file. If filename ends in .gz, then output is gzipped." - tag_supplementary: "Also tag supplementary alignments. Supplementary alignments are assigned to the same haplotype the primary alignment has been assigned to (default: only tag primary alignments)." - vcf: "VCF file with phased variants (must be gzip-compressed and indexed)" - alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype" + vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." + vcfIndex: "Index for the VCF or BCF file with variants to be phased." + alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype." + alignmentsIndex: "Index for the alignment file." memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 289a42d5baaaa7aa0a38cbadde436d610009d4f5 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 15 Oct 2020 15:50:14 +0200 Subject: [PATCH 101/668] Rename parameter meta for index --- whatshap.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whatshap.wdl b/whatshap.wdl index 1334d45b..2ee90f50 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -183,7 +183,7 @@ task Haplotag { parameter_meta { outputFile: "Output file. If omitted, use standard output." reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." - referenceIndex: "Index for the reference file." + referenceFastaIndex: "Index for the reference file." regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." From a772e3773feedcb22f7e18f8a1f0130fd9b3cf0c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 16 Oct 2020 15:08:33 +0200 Subject: [PATCH 102/668] add gripss, timeMinutes for gridss, fix typos --- bcftools.wdl | 2 +- gridss.wdl | 7 ++- gripss.wdl | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++ snpeff.wdl | 2 +- 4 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 gripss.wdl diff --git a/bcftools.wdl b/bcftools.wdl index 0be3be93..e68e527c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -150,7 +150,7 @@ task Filter { ~{vcf} \ -O z \ -o ~{outputPath} - bctools index --tbi ~{outputPath} + bcftools index --tbi ~{outputPath} } output { diff --git a/gridss.wdl b/gridss.wdl index 44b9e9f1..7516553d 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2017 Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,7 +34,8 @@ task GRIDSS { String outputPrefix = "gridss" Int jvmHeapSizeGb = 30 - Int threads = 1 + Int threads = 2 + Int timeMinutes = ceil(1440 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -64,6 +65,7 @@ task GRIDSS { runtime { cpu: threads memory: "~{jvmHeapSizeGb + 1}G" + time_minutes: timeMinutes docker: dockerImage } @@ -79,6 +81,7 @@ task GRIDSS { threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file diff --git a/gripss.wdl b/gripss.wdl new file mode 100644 index 00000000..6ed0bcf9 --- /dev/null +++ b/gripss.wdl @@ -0,0 +1,117 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task ApplicationKt { + input { + File inputVcf + String outputPath = "gripss.vcf.gz" + File referenceFasta + File breakpointHotspot + File breakendPon + File breakpointPon + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssApplicationKt \ + -ref_genome ~{referenceFasta} \ + -breakpoint_hotspot ~{breakpointHotspot} \ + -breakend_pon ~{breakendPon} \ + -breakpoint_pon ~{breakpointPon} \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "advanced"} + breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task HardFilterApplicationKt { + input { + File inputVcf + String outputPath = "gripss_hard_filter.vcf.gz" + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file diff --git a/snpeff.wdl b/snpeff.wdl index 079a720a..d639a036 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -24,7 +24,7 @@ task SnpEff { set -e mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} - snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -v \ ~{genomeVersion} \ -noDownload \ From 37ba60dd104f3a221c29d6fd6cf2e5c2be76e1ce Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 22 Oct 2020 07:31:08 +0200 Subject: [PATCH 103/668] Add memory to samtools Merge --- samtools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 24d95aa4..ad94338a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -332,6 +332,7 @@ task Merge { Int threads = 1 Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + String memory = "4G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -355,6 +356,7 @@ task Merge { runtime { cpu: threads docker: dockerImage + memory: memory time_minutes: timeMinutes } @@ -362,7 +364,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - threads: {description: "Number of threads to use.", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 6581d965977ab6a4f31058065bca84fc4106ed9f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 22 Oct 2020 14:05:48 +0200 Subject: [PATCH 104/668] add AnnotateInsertedSequence task to gridss.wdl --- gridss.wdl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 7516553d..78e4bd40 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -84,4 +84,60 @@ task GRIDSS { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } +} + +task AnnotateInsertedSequence { + input { + File inputVcf + String outputPath = "gridss.annotated.vcf.gz" + File viralReference + + Int threads = 8 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2 / threads) + } + + command { + java -Xmx~{javaXmx} \ + -Dsamjdk.create_index=true \ + -Dsamjdk.use_async_io_read_samtools=true \ + -Dsamjdk.use_async_io_write_samtools=true \ + -Dsamjdk.use_async_io_write_tribble=true \ + -Dsamjdk.buffer_size=4194304 \ + -cp /usr/local/share/gridss-2.9.4-0/gridss.jar \ + gridss.AnnotateInsertedSequence \ + REFERENCE_SEQUENCE=~{viralReference} \ + INPUT=~{inputVcf} \ + OUTPUT=~{outputPath} \ + ALIGNMENT=APPEND \ + WORKING_DIR='.' \ + WORKER_THREADS=~{threads} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF file.", category: "required"} + outputPath: {description: "The path the output will be written to.", category: "common"} + viralReference: {description: "A fasta file with viral sequences.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } \ No newline at end of file From cd64c02f84707a26ed6787e83269347ed6a69ca4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 22 Oct 2020 15:27:17 +0200 Subject: [PATCH 105/668] add some # !UnknownRuntimeKey --- gridss.wdl | 4 ++-- gripss.wdl | 4 ++-- snpeff.wdl | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 78e4bd40..89558ff3 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -65,7 +65,7 @@ task GRIDSS { runtime { cpu: threads memory: "~{jvmHeapSizeGb + 1}G" - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -124,7 +124,7 @@ task AnnotateInsertedSequence { runtime { cpu: threads memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/gripss.wdl b/gripss.wdl index 6ed0bcf9..3f500a60 100644 --- a/gripss.wdl +++ b/gripss.wdl @@ -54,7 +54,7 @@ task ApplicationKt { runtime { memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -100,7 +100,7 @@ task HardFilterApplicationKt { runtime { memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/snpeff.wdl b/snpeff.wdl index d639a036..a26fadbd 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -45,7 +45,7 @@ task SnpEff { runtime { docker: dockerImage - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey memory: memory } From 208e8f46530b8a1d0dbdbd3afa22bc7449c03da3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 09:37:14 +0100 Subject: [PATCH 106/668] add some missing inputs to gridss AnnotateInsertedSequence and add missing \ to bcftools Filter --- bcftools.wdl | 2 +- gridss.wdl | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index e68e527c..4703580a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -146,7 +146,7 @@ task Filter { filter \ ~{"-i " + include} \ ~{"-e " + exclude} \ - ~{"-s " + softFilter} + ~{"-s " + softFilter} \ ~{vcf} \ -O z \ -o ~{outputPath} diff --git a/gridss.wdl b/gridss.wdl index 89558ff3..cfbb7069 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -91,6 +91,8 @@ task AnnotateInsertedSequence { File inputVcf String outputPath = "gridss.annotated.vcf.gz" File viralReference + File viralReferenceFai + File viralReferenceDict Int threads = 8 String javaXmx = "8G" From 674158b82e2a637c536853113721c48db6e6d09c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 10:51:06 +0100 Subject: [PATCH 107/668] add license notice to snpeff, add index input for bcftools annotate, and BWA mem index image input for gridss annotate inserted sequences --- bcftools.wdl | 2 ++ gridss.wdl | 4 ++++ snpeff.wdl | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 4703580a..d358ab7b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,6 +44,7 @@ task Annotate { Boolean singleOverlaps = false Array[String] removeAnns = [] File inputFile + File? inputFileIndex String outputPath = "output.vcf.gz" Int threads = 0 @@ -117,6 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} + ipnutFileIndex: {description: "The index for the input vcf or bcf.", "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gridss.wdl b/gridss.wdl index cfbb7069..c444c854 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,6 +93,7 @@ task AnnotateInsertedSequence { File viralReference File viralReferenceFai File viralReferenceDict + File viralReferenceImg Int threads = 8 String javaXmx = "8G" @@ -134,6 +135,9 @@ task AnnotateInsertedSequence { inputVcf: {description: "The input VCF file.", category: "required"} outputPath: {description: "The path the output will be written to.", category: "common"} viralReference: {description: "A fasta file with viral sequences.", category: "required"} + viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} + viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} + viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", diff --git a/snpeff.wdl b/snpeff.wdl index a26fadbd..2a113c52 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -1,5 +1,27 @@ version 1.0 +# MIT License +# +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task SnpEff { input { File vcf From 836f40c11ad03ca513345ba56b6feb502b2724dc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 11:07:09 +0100 Subject: [PATCH 108/668] fix missing key in parameter_met --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index d358ab7b..064e2d6e 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,7 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} - ipnutFileIndex: {description: "The index for the input vcf or bcf.", "common"} + ipnutFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8bc34ddf78f998b838bec85e43926b25da42cc66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 11:12:19 +0100 Subject: [PATCH 109/668] typo --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 064e2d6e..3b512716 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,7 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} - ipnutFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} + inputFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 070cbb252016d18f59d52e4919a2a267f1c18671 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 13:31:03 +0100 Subject: [PATCH 110/668] add missing input --- bcftools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 3b512716..1dba7611 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -25,6 +25,7 @@ version 1.0 task Annotate { input { File? annsFile + File? annsFileIndex String? collapse Array[String] columns = [] String? exclude @@ -99,7 +100,8 @@ task Annotate { parameter_meta { outputPath: {description: "The location the output VCF file should be written.", category: "common"} outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} - annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} + annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "common"} + annsFileIndex: {description: "The index for annsFile.", category: "common"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} From 1e19fbb2a00187bfa10cab023aa52dacb1091e03 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 14:09:13 +0100 Subject: [PATCH 111/668] add missing inputs --- gripss.wdl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gripss.wdl b/gripss.wdl index 3f500a60..c9a8f27d 100644 --- a/gripss.wdl +++ b/gripss.wdl @@ -25,6 +25,8 @@ task ApplicationKt { File inputVcf String outputPath = "gripss.vcf.gz" File referenceFasta + File referenceFastaFai + File referenceFastaDict File breakpointHotspot File breakendPon File breakpointPon @@ -61,7 +63,10 @@ task ApplicationKt { parameter_meta { inputVcf: {description: "The input VCF.", category: "required"} outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "advanced"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} From d6109250b32299638c1d0f47edf580a69b0732b4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 16:36:10 +0100 Subject: [PATCH 112/668] add some cleanup to snpeff --- snpeff.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/snpeff.wdl b/snpeff.wdl index 2a113c52..85709079 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -59,6 +59,7 @@ task SnpEff { ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ > ~{outputPath} + rm -r $PWD/data } output { From a82be38ca7ff228233a5cd49c0495e3714a7ca79 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 09:32:18 +0100 Subject: [PATCH 113/668] Update pbbam.wdl Co-authored-by: Jasper --- pbbam.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pbbam.wdl b/pbbam.wdl index 368ff4ed..52737a00 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -23,6 +23,7 @@ task Index { input { File bamFile String? outputBamPath + String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" From 7db21a6481522746b0699c2756083d57326be164 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 16:02:35 +0100 Subject: [PATCH 114/668] Add support for outputPrefix with or without folder --- chunked-scatter.wdl | 3 +++ lima.wdl | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index b54a7d2e..8895c2a4 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -24,6 +24,7 @@ task ChunkedScatter { input { File inputFile String prefix = "./scatter" + Boolean splitContigs = false Int? chunkSize Int? overlap Int? minimumBasesPerFile @@ -40,6 +41,7 @@ task ChunkedScatter { ~{"-c " + chunkSize} \ ~{"-o " + overlap} \ ~{"-m " + minimumBasesPerFile} \ + ~{true="--split-contigs " false="" splitContigs} \ ~{inputFile} } @@ -108,6 +110,7 @@ task ScatterRegions { splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} + splitContigs: {description: "Allow contigs to be split during scattering.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/lima.wdl b/lima.wdl index 38cf2d6e..7ef9d4ab 100644 --- a/lima.wdl +++ b/lima.wdl @@ -58,6 +58,7 @@ task Lima { command { set -e + mkdir -p "$(dirname ~{outputPrefix})" lima \ ~{libraryDesignOptions[libraryDesign]} \ ~{true="--score-full-pass" false="" scoreFullPass} \ @@ -86,6 +87,15 @@ task Lima { ~{inputBamFile} \ ~{barcodeFile} \ ~{outputPrefix + ".bam"} + + # copy the files with the default filename to the folder specified in + # outputPrefix. + if [ "~{basename(outputPrefix)}.json" != "~{outputPrefix}.json" ]; then + cp "~{basename(outputPrefix)}.json" "~{outputPrefix}.json" + cp "~{basename(outputPrefix)}.lima.counts" "~{outputPrefix}.lima.counts" + cp "~{basename(outputPrefix)}.lima.report" "~{outputPrefix}.lima.report" + cp "~{basename(outputPrefix)}.lima.summary" "~{outputPrefix}.lima.summary" + fi } output { From a7445b829f0babf6257b376e71f48f4c860828cc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 16:13:02 +0100 Subject: [PATCH 115/668] Remove duplicate parameter meta entry --- chunked-scatter.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 8895c2a4..115c5ca4 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -110,7 +110,6 @@ task ScatterRegions { splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} - splitContigs: {description: "Allow contigs to be split during scattering.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From af075999debec07b821010b0e0d260c23b41e143 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 16:19:46 +0100 Subject: [PATCH 116/668] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c1f32dd..b27addab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ version 5.0.0-dev + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3. -+ pbbam: Add task for indexing PacBio bam files ++ pbbam: Add task for indexing PacBio bam files. + picard: Add CollectHsMetrics and CollectVariantCallingMetrics. + Samtools: Add `threads` to parameter meta for Merge task. + bcftools: add tmpDir input to specify temporary directory when sorting. From 8df9a800fb56341a2c0b964f9300d49394cf485d Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 06:52:29 +0100 Subject: [PATCH 117/668] Update to CCS version 5 --- ccs.wdl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index 60e43711..bcebefe9 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -29,12 +29,14 @@ task CCS { Float minReadQuality = 0.99 String logLevel = "WARN" File subreadsFile + File? subreadsIndexFile + String? chunkString String outputPrefix Int cores = 2 String memory = "2G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" + String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" } command { @@ -48,7 +50,8 @@ task CCS { --min-rq ~{minReadQuality} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - ~{"--report-file " + outputPrefix + ".ccs.report.txt"} \ + ~{"--chunk " + chunkString} \ + ~{"--report-json " + outputPrefix + ".ccs.report.json"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ ~{subreadsFile} \ ~{outputPrefix + ".ccs.bam"} @@ -57,7 +60,7 @@ task CCS { output { File ccsBam = outputPrefix + ".ccs.bam" File ccsBamIndex = outputPrefix + ".ccs.bam.pbi" - File ccsReport = outputPrefix + ".ccs.report.txt" + File ccsReport = outputPrefix + ".ccs.report.json" File ccsStderr = outputPrefix + ".ccs.stderr.log" } @@ -77,6 +80,9 @@ task CCS { minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsFile: {description: "Subreads input file.", category: "required"} + subreadsIndexFile: {description: "Index for the subreads input file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "advanced"} + chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From 910200447daeadbdf8b7698db39719ba35126498 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 06:54:14 +0100 Subject: [PATCH 118/668] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b27addab..e2068f49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3. From e29df66cd70df1681b892c8fb01af426beb4333a Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 07:03:02 +0100 Subject: [PATCH 119/668] Remove duplicate parameter meta --- ccs.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/ccs.wdl b/ccs.wdl index bcebefe9..5d9887bf 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -81,7 +81,6 @@ task CCS { logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsFile: {description: "Subreads input file.", category: "required"} subreadsIndexFile: {description: "Index for the subreads input file.", category: "required"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From ccfd843303c5186121de89a6d667dc1fb20f4100 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 09:12:11 +0100 Subject: [PATCH 120/668] Update parameter meta --- ccs.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccs.wdl b/ccs.wdl index 5d9887bf..cab15fea 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -80,7 +80,7 @@ task CCS { minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsFile: {description: "Subreads input file.", category: "required"} - subreadsIndexFile: {description: "Index for the subreads input file.", category: "required"} + subreadsIndexFile: {description: "Index for the subreads input file, required when using chunkString.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From 61ba73556876d2bb1a1cc73ca9765af29a8e45ba Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 09:45:40 +0100 Subject: [PATCH 121/668] Update parameter meta --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index ad94338a..9e415b0e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -367,6 +367,7 @@ task Merge { threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 7c63b058e9e1c23407bf5f07c04372d16226523a Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 10:05:57 +0100 Subject: [PATCH 122/668] Add postprocess argument to DeepVariant task --- deepvariant.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 88bdb352..10bc49c9 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -28,6 +28,7 @@ task RunDeepVariant { File inputBamIndex String modelType String outputVcf + String? postprocessVariantsExtraArgs File? customizedModel Int? numShards String? outputGVcf @@ -51,8 +52,9 @@ task RunDeepVariant { ~{"--output_gvcf " + outputGVcf} \ ~{"--customized_model " + customizedModel} \ ~{"--num_shards " + numShards} \ - ~{"--regions} " + regions} \ + ~{"--regions " + regions} \ ~{"--sample_name " + sampleName} \ + ~{"--postprocess_variants_extra_args " + postprocessVariantsExtraArgs} \ ~{true="--vcf_stats_report" false="--novcf_stats_report" VCFStatsReport} } From 05f14ce2fa3af46ef79afa3c868837ad49db0fb5 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 10:31:24 +0100 Subject: [PATCH 123/668] Update parameter meta --- deepvariant.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/deepvariant.wdl b/deepvariant.wdl index 10bc49c9..f5661886 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -85,6 +85,7 @@ task RunDeepVariant { regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} + postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 680563febf9dba81cff822f73ab599b351f3e7c6 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 10:33:36 +0100 Subject: [PATCH 124/668] Fix bug in whatshap task --- whatshap.wdl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 2ee90f50..93624590 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -45,6 +45,8 @@ task Phase { } command { + set -e + whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -56,7 +58,8 @@ task Phase { ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ - ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ + ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} + tabix -p vcf ~{outputVCF} } @@ -159,13 +162,16 @@ task Haplotag { } command { + set -e + whatshap haplotag \ ~{vcf} \ ~{alignments} \ ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} + python3 -c "import pysam; pysam.index('~{outputFile}')" } From f4fee79b3e26f11c9b6dce07a64e517596a6ca78 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 14:12:09 +0100 Subject: [PATCH 125/668] Update first set of tasks to uniform layout. --- CHANGELOG.md | 70 +++++++++++++++++++++++----------- CPAT.wdl | 20 ++++++---- bam2fastx.wdl | 45 +++++++++++++++------- bcftools.wdl | 93 +++++++++++++++++++++++---------------------- bedtools.wdl | 44 +++++++++++++-------- biowdl.wdl | 17 ++++----- bowtie.wdl | 33 ++++++++-------- bwa-mem2.wdl | 38 +++++++++--------- bwa.wdl | 36 +++++++++--------- ccs.wdl | 31 +++++++-------- centrifuge.wdl | 8 ++-- chunked-scatter.wdl | 17 +++++---- clever.wdl | 16 ++++---- collect-columns.wdl | 17 +++++---- common.wdl | 32 +++++++++------- cutadapt.wdl | 45 ++++++++++------------ deepvariant.wdl | 22 +++++------ 17 files changed, 324 insertions(+), 260 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2068f49..2c04b582 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ CCS: `cores` input has been renamed to `threads` to match tool naming. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. @@ -19,7 +20,8 @@ version 5.0.0-dev + picard: Add CollectHsMetrics and CollectVariantCallingMetrics. + Samtools: Add `threads` to parameter meta for Merge task. + bcftools: add tmpDir input to specify temporary directory when sorting. -+ bcftools: remove outputType and implement indexing based on output file extension. ++ bcftools: remove outputType and implement indexing based on output + file extension. + NanoPack: Add parameter_meta to NanoPlot task. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). @@ -111,8 +113,8 @@ version 4.0.0 + Change MultiQC inputs. It now accepts an array of reports files. It does not need access to a folder with the reports anymore. MultiQC can now be used as a normal WDL task without hacks. -+ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will make sure the - task will not fail if one of the metrics is set to false. ++ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will + make sure the task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. @@ -134,7 +136,8 @@ version 4.0.0 + Add faidx task to samtools. + Isoseq3: Remove dirname command from output folder creation step. + Isoseq3: Requires more memory by default, is now 2G. -+ Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. ++ Isoseq3: Remove cp commands and other bash magic, file naming is now + solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. @@ -145,7 +148,8 @@ version 3.1.0 + Lima: Add missing output to parameter_meta. + Lima: Remove outputPrefix variable from output section. + Isoseq3: Make sure stderr log file from Refine is unique and not overwritten. -+ Isoseq3: Add workaround in Refine for glob command not locating files in output directory. ++ Isoseq3: Add workaround in Refine for glob command not locating files + in output directory. + Isoseq3: Fix --min-polya-length argument syntax. + Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. @@ -189,10 +193,13 @@ version 3.0.0 + Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set whether output should be a GVCF. + Centrifuge: Add Krona task specific to Centrifuge. -+ Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located. ++ Centrifuge: Fix Centrifuge tests, where sometimes the index files could + still not be located. + Update parameter_meta for TALON, Centrifuge and Minimap2. -+ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location. -+ Add `minimumContigLength` input to PlotDenoisedCopyRatios and PlotModeledSegments. ++ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct + index files location. ++ Add `minimumContigLength` input to PlotDenoisedCopyRatios + and PlotModeledSegments. + Add `commonVariantSitesIndex` input to CollectAllelicCounts. + Centrifuge: Fix issue where Centrifuge could not locate index files. + Increase default memory of BWA mem to 32G (was 16G). @@ -228,11 +235,13 @@ version 3.0.0 + Removed the "extraArgs" input from FilterMutectCalls. + Removed unused "verbose" and "quiet" inputs from multiqc. + Added parameter_meta sections to a variety of tasks. -+ Picard's BedToIntervalList outputPath input is now optional (with a default of "regions.interval_list"). ++ Picard's BedToIntervalList outputPath input is now + optional (with a default of "regions.interval_list"). + TALON: Fix SQLite error concerning database/disk space being full. + Update htseq to default image version 0.11.2. + Update biowdl-input-converter in common.wdl to version 0.2.1. -+ Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. ++ Update TALON section to now include the new annotation file output, and + add config file creation to the TALON task. + Removed unused inputs (trimPrimer and format) for cutadapt. + Various minor command tweaks to increase stability. + Fixed unused inputs in bedtools sort (inputs are now used). @@ -245,7 +254,8 @@ version 2.1.0 + Updated biowdl-input-converter version. + GATK CombineGVCFs memory was tripled to prevent it from using a lot of CPU in Garbage Collection mode. -+ Updated parameter_meta sections for Minimap2 and TranscriptClean to wdl-aid format. ++ Updated parameter_meta sections for Minimap2 and TranscriptClean to + wdl-aid format. + Updated cores variable for TALON, the default is now 4. + Updated TALON to version 4.4. + Added parameter_meta sections to the following tools: @@ -262,10 +272,14 @@ version 2.1.0 version 2.0.0 --------------------------- + TranscriptClean: Update TranscriptClean to version 2.0.2. -+ Memory runtime attributes are now Strings indicating total memory, as opposed to Ints indicating memory per core. -+ Memory inputs for most tasks are now Strings, remaining Int memory inputs are renamed to "memoryGb". -+ Use the biowdl-input-converter container for JsonToYaml, to reduce the amount of containers needed. -+ Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists which it replaces. ++ Memory runtime attributes are now Strings indicating total memory, as + opposed to Ints indicating memory per core. ++ Memory inputs for most tasks are now Strings, remaining Int memory inputs + are renamed to "memoryGb". ++ Use the biowdl-input-converter container for JsonToYaml, to reduce the + amount of containers needed. ++ Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists + which it replaces. + GATK.GenotypeGVCFs: Increased memoryMultiplier from 2.0 to 3.0 . + Minimap2: Add -k option to minimap2 mapping. + Added bwakit task. @@ -279,7 +293,9 @@ version 1.0.0 + Removed deprecated tasks: + bioconda.installPrefix + mergecounts.MergeCounts -+ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" and "knownIndelsSitesVCFIndexes" are no longer optional, but now have a default of "[]". ++ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" + and "knownIndelsSitesVCFIndexes" are no longer optional, but + now have a default of "[]". + Removed BWA index task. + Removed unused "picardJar" input from bwa.wdl. + All inputs to bedtools Sort are now reflected in the generated command. @@ -295,17 +311,25 @@ version 1.0.0 + Fastqsplitter: use version 1.1. + Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency. + Common: Update dockerTag to dockerImage. -+ GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers. -+ Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls). -+ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. ++ GATK: Add CombineVariants task that allows, e.g., to merge VCFs + from different callers. ++ Mutect2: Add GATK tasks related to variant + filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, + CalculateContamination and FilterMutectCalls). ++ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring + an update to GATK 4.1.2.0. + Mutect2: Add necessary missing index attribute for panel of normals. + MultiQC: Add memory variable to multiqc task. -+ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need regions files as required inputs. -+ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired VCF filtering script. -+ Cutadapt: If the output is a gzipped file, compress with level 1 (instead of default 6). ++ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need + regions files as required inputs. ++ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired + VCF filtering script. ++ Cutadapt: If the output is a gzipped file, compress with + level 1 (instead of default 6). + Cutadapt: Fix issues with read2output when using single-end reads. + Add feature type, idattr and additional attributes to htseq-count. + Added allow-contain option to bowtie. + Added a changelog to keep track of changes. -+ Added sortByName task in samtools to support more memory efficient execution of HTSeqCount. ++ Added sortByName task in samtools to support more memory efficient + execution of HTSeqCount. + Removed the bam index from HTSeqCount's inputs. diff --git a/CPAT.wdl b/CPAT.wdl index 3b542e4f..d97031dc 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -26,17 +26,22 @@ task CPAT { String outFilePath File hex File logitModel + File? referenceGenome - File? referenceGenomeIndex # Should be added as input if - # CPAT should not index the reference genome. + # Should be added as input if CPAT should not index the + # reference genome. + File? referenceGenomeIndex Array[String]? startCodons Array[String]? stopCodons + Int timeMinutes = 10 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } - # Some WDL magic in the command section to properly output the start and stopcodons to the command. - # select_first is needed in order to convert the optional arrays to non-optionals. + # Some WDL magic in the command section to properly output the start and + # stopcodons to the command. + # select_first is needed in order to convert the optional arrays + # to non-optionals. command { set -e mkdir -p "$(dirname ~{outFilePath})" @@ -60,18 +65,17 @@ task CPAT { } parameter_meta { + # inputs gene: {description: "Equivalent to CPAT's `--gene` option.", category: "required"} outFilePath: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} hex: {description: "Equivalent to CPAT's `--hex` option.", category: "required"} logitModel: {description: "Equivalent to CPAT's `--logitModel` option.", category: "required"} referenceGenome: {description: "Equivalent to CPAT's `--ref` option.", category: "advanced"} - referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", - category: "advanced"} + referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", category: "advanced"} startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 18434755..e8884ab0 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -37,7 +37,22 @@ task Bam2Fasta { command { set -e - mkdir -p "$(dirname ~{outputPrefix})" + mkdir -p "$(dirname ~{outputPrefix})"' + + # Localise the bam and pbi files so they are next to each other in the + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam}; + do + ln ${bamFile} . + bamFiles=${bamFiles}" $(basename ${bamFile})" + done + + for index in ~{sep=" " bamIndex}; + do + ln ${index} . + done + bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ @@ -93,15 +108,17 @@ task Bam2Fastq { mkdir -p "$(dirname ~{outputPrefix})" # Localise the bam and pbi files so they are next to each other in the - # current folder - bamfiles="" - for bamfile in ~{sep=" " bam};do - ln $bamfile . - bamfiles=$bamfiles" $(basename $bamfile)" + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam}; + do + ln ${bamFile} . + bamFiles=${bamFiles}" $(basename ${bamFile})" done - for bamindex in ~{sep=" " bamIndex}; do - ln $bamindex . + for index in ~{sep=" " bamIndex}; + do + ln ${index} . done bam2fastq \ @@ -109,7 +126,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - $bamfiles + ${bamFiles} } output { diff --git a/bcftools.wdl b/bcftools.wdl index a0aeb442..41825747 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -24,26 +22,27 @@ version 1.0 task Annotate { input { + Array[String] columns = [] + Boolean force = false + Boolean keepSites = false + Boolean noVersion = false + Array[String] samples = [] + Boolean singleOverlaps = false + Array[String] removeAnns = [] + File inputFile + String outputPath = "output.vcf.gz" + File? annsFile String? collapse - Array[String] columns = [] String? exclude - Boolean force = false File? headerLines String? newId String? include - Boolean keepSites = false String? markSites - Boolean noVersion = false String? regions File? regionsFile File? renameChrs - Array[String] samples = [] File? samplesFile - Boolean singleOverlaps = false - Array[String] removeAnns = [] - File inputFile - String outputPath = "output.vcf.gz" Int threads = 0 String memory = "256M" @@ -80,9 +79,8 @@ task Annotate { ~{inputFile} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} - } - + output { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" @@ -95,31 +93,31 @@ task Annotate { } parameter_meta { + # inputs + columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} + force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} + keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} + noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} + removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} + inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} - columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} - force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} - keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} - noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} renameChrs: {description: "rename chromosomes according to the map in file (see man page for details).", category: "advanced"} - samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} samplesFile: {description: "File of samples to include.", category: "advanced"} - singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} - removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} - inputFile: {description: "A vcf or bcf file.", category: "required"} - threads: {description: "Number of extra decompression threads [0].", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -128,6 +126,7 @@ task Sort { File inputFile String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" + String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -159,6 +158,7 @@ task Sort { } parameter_meta { + # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} tmpDir: {description: "The location of the temporary files during the bcftools sorting.", category: "advanced"} @@ -166,46 +166,45 @@ task Sort { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } - - } task Stats { input { File inputVcf File inputVcfIndex + String outputPath = basename(inputVcf) + ".stats" + Boolean firstAlleleOnly = false + Boolean splitByID = false + Array[String] samples = [] + Boolean verbose = false + File? compareVcf File? compareVcfIndex - String outputPath = basename(inputVcf) + ".stats" String? afBins String? afTag - Boolean firstAlleleOnly = false String? collapse String? depth String? exclude - File? exons + File? exons String? applyFilters File? fastaRef File? fastaRefIndex - String? include - Boolean splitByID = false + String? include String? regions File? regionsFile - Array[String] samples = [] - File? samplesFile - String? targets + File? samplesFile + String? targets File? targetsFile String? userTsTv - Boolean verbose = false Int threads = 0 - Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. - String memory = "256M" + String memory = "256M" + Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } - + command { - set -e + set -e mkdir -p $(dirname ~{outputPath}) bcftools stats \ ~{"--af-bins " + afBins} \ @@ -237,19 +236,24 @@ task Stats { runtime { cpu: threads + 1 - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { + # inputs inputVcf: {description: "The VCF to be analysed.", category: "required"} inputVcfIndex: {description: "The index for the input VCF.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} + splitByID: {description: "Collect stats for sites with ID separately (known vs novel).", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} compareVcf: {description: "When inputVcf and compareVCF are given, the program generates separate stats for intersection and the complements. By default only sites are compared, samples must be given to include also sample columns.", category: "common"} compareVcfIndex: {description: "Index for the compareVcf.", category: "common"} afBins: {description: "Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1).", category: "advanced"} afTag: {description: "Allele frequency tag to use, by default estimated from AN,AC or GT.", category: "advanded"} - firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} depth: {description: "Depth distribution: min,max,bin size [0,500,1].", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} @@ -258,20 +262,16 @@ task Stats { fastaRef: {description: "Faidx indexed reference sequence file to determine INDEL context.", category: "advanced"} fastaRefIndex: {description: "Index file (.fai) for fastaRef. Must be supplied if fastaRef is supplied.", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} - splitByID: {description: "Collect stats for sites with ID separately (known vs novel).", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} - samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} samplesFile: {description: "File of samples to include.", category: "advanced"} targets: {description: "Similar to regions but streams rather than index-jumps.", category: "advanced"} targetsFile: {description: "Similar to regionsFile but streams rather than index-jumps.", category: "advanced"} userTsTv: {description: ". Collect Ts/Tv stats for any tag using the given binning [0:1:100].", category: "advanced"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} - verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -279,6 +279,7 @@ task View { input { File inputFile String outputPath = "output.vcf" + String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -296,6 +297,7 @@ task View { ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } + output { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" @@ -308,6 +310,7 @@ task View { } parameter_meta { + # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/bedtools.wdl b/bedtools.wdl index c228d6c6..b7a03c17 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -25,6 +25,7 @@ task Complement { File faidx File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" + String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -52,13 +53,13 @@ task Complement { } parameter_meta { + # inputs faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes.", category: "required"} inputBed: {description: "The inputBed to complement.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -66,12 +67,14 @@ task Merge { input { File inputBed String outputBed = "merged.bed" + String memory = "~{512 + ceil(size(inputBed, "M"))}M" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } command { + set -e bedtools merge -i ~{inputBed} > ~{outputBed} } @@ -86,12 +89,12 @@ task Merge { } parameter_meta { + # inputs inputBed: {description: "The bed to merge.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -100,6 +103,7 @@ task MergeBedFiles { input { Array[File]+ bedFiles String outputBed = "merged.bed" + String memory = "~{512 + ceil(size(bedFiles, "M"))}M" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -120,13 +124,14 @@ task MergeBedFiles { time_minutes: timeMinutes docker: dockerImage } + parameter_meta { + # inputs bedFiles: {description: "The bed files to merge.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -139,9 +144,13 @@ task Sort { Boolean chrThenSizeD = false Boolean chrThenScoreA = false Boolean chrThenScoreD = false + String outputBed = "output.sorted.bed" + File? genome File? faidx - String outputBed = "output.sorted.bed" + + String memory = "~{512 + ceil(size(inputBed, "M"))}M" + Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -166,6 +175,8 @@ task Sort { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } } @@ -174,13 +185,15 @@ task Intersect { input { File regionsA File regionsB - # Giving a faidx file will set the sorted option. - File? faidx String outputBed = "intersect.bed" + + File? faidx # Giving a faidx file will set the sorted option. + String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } + Boolean sorted = defined(faidx) command { @@ -205,14 +218,13 @@ task Intersect { } parameter_meta { - faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", - category: "common"} - regionsA: {description: "Region file a to intersect", category: "required"} - regionsB: {description: "Region file b to intersect", category: "required"} - outputBed: {description: "The path to write the output to", category: "advanced"} + # inputs + regionsA: {description: "Region file a to intersect.", category: "required"} + regionsB: {description: "Region file b to intersect.", category: "required"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", category: "common"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/biowdl.wdl b/biowdl.wdl index 838755d9..8a1f9dfd 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -32,6 +32,7 @@ task InputConverter { Boolean checkFileMd5sums=false Boolean old=false + String memory = "128M" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } @@ -52,22 +53,20 @@ task InputConverter { } runtime { - memory: "128M" + memory: memory time_minutes: timeMinutes docker: dockerImage } parameter_meta { + # inputs samplesheet: {description: "The samplesheet to be processed.", category: "required"} - outputFile: {description: "The location the JSON representation of the samplesheet should be written to.", - category: "advanced"} - skipFileCheck: {description: "Whether or not the existance of the files mentioned in the samplesheet should be checked.", - category: "advanced"} - checkFileMd5sums: {description: "Whether or not the MD5 sums of the files mentioned in the samplesheet should be checked.", - category: "advanced"} + outputFile: {description: "The location the JSON representation of the samplesheet should be written to.", category: "advanced"} + skipFileCheck: {description: "Whether or not the existance of the files mentioned in the samplesheet should be checked.", category: "advanced"} + checkFileMd5sums: {description: "Whether or not the MD5 sums of the files mentioned in the samplesheet should be checked.", category: "advanced"} old: {description: "Whether or not the old samplesheet format should be used.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/bowtie.wdl b/bowtie.wdl index b3f3ceae..7fb1b614 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -28,30 +26,31 @@ task Bowtie { Array[File] readsDownstream = [] String outputPath = "mapped.bam" Array[File]+ indexFiles - Int? seedmms - Int? seedlen - Int? k Boolean best = false Boolean strata = false Boolean allowContain = false + + Int? seedmms + Int? seedlen + Int? k String? samRG + String picardXmx = "4G" Int threads = 1 - Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) String memory = "~{5 + ceil(size(indexFiles, "G"))}G" - String picardXmx = "4G" + Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" } # Assume fastq input with -q flag. - # The output always needs to be SAM as it is piped into Picard SortSam + # The output always needs to be SAM as it is piped into Picard SortSam. # Hence, the --sam flag is used. - command { set -e -o pipefail mkdir -p "$(dirname ~{outputPath})" - bowtie -q \ + bowtie \ + -q \ --sam \ ~{"--seedmms " + seedmms} \ ~{"--seedlen " + seedlen} \ @@ -84,24 +83,22 @@ task Bowtie { } parameter_meta { + # inputs readsUpstream: {description: "The first-/single-end fastq files.", category: "required"} readsDownstream: {description: "The second-end fastq files.", category: "common"} outputPath: {description: "The location the output BAM file should be written to.", category: "common"} indexFiles: {description: "The index files for bowtie.", category: "required"} - seedmms: {description: "Equivalent to bowtie's `--seedmms` option.", category: "advanced"} - seedlen: {description: "Equivalent to bowtie's `--seedlen` option.", category: "advanced"} - k: {description: "Equivalent to bowtie's `-k` option.", category: "advanced"} best: {description: "Equivalent to bowtie's `--best` flag.", category: "advanced"} strata: {description: "Equivalent to bowtie's `--strata` flag.", category: "advanced"} allowContain: {description: "Equivalent to bowtie's `--allow-contain` flag.", category: "advanced"} + seedmms: {description: "Equivalent to bowtie's `--seedmms` option.", category: "advanced"} + seedlen: {description: "Equivalent to bowtie's `--seedlen` option.", category: "advanced"} + k: {description: "Equivalent to bowtie's `-k` option.", category: "advanced"} samRG: {description: "Equivalent to bowtie's `--sam-RG` option.", category: "advanced"} - - picardXmx: {description: "The maximum memory available to the picard (used for sorting the output). Should be lower than `memory` to accommodate JVM overhead and bowtie's memory usage.", - category: "advanced"} + picardXmx: {description: "The maximum memory available to the picard (used for sorting the output). Should be lower than `memory` to accommodate JVM overhead and bowtie's memory usage.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 6ea4578d..34cd38a6 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -26,33 +26,35 @@ task Mem { File? read2 BwaIndex bwaIndex String outputPrefix - String? readgroup Boolean sixtyFour = false Boolean usePostalt = false - Int threads = 4 - Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - Int? memoryGb + + String? readgroup + Int? sortThreads + Int? memoryGb + + Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } - # Samtools sort may block the pipe while it is writing data to disk. + # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA-mem2's index files contain 2 BWT indexes of which only one is used. .2bit64 is used by default and + # BWA-mem2's index files contain 2 BWT indexes of which only one is used. .2bit64 is used by default and # .8bit32 is used for avx2. # The larger one of these is the 8bit32 index. Since we do not know beforehand which one is used we need to accomodate for that. - # Using only the 8bit32 index uses 57,5% of the index files. Since bwa-mem2 uses slightly more memory than the index + # Using only the 8bit32 index uses 57,5% of the index files. Since bwa-mem2 uses slightly more memory than the index. # We put it at 62% as a safety factor. That means the memory usage for bwa-mem will be 53G for a human genome. Resulting in 60G total # on 8 cores with samtools with 3 sort threads. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 0.62) + sortMemoryPerThreadGb * totalSortThreads - # The bwa postalt script is out commented as soon as usePostalt = false. + # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e @@ -81,7 +83,7 @@ task Mem { runtime { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. - cpu: threads + cpu: threads memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage @@ -92,21 +94,21 @@ task Mem { read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} - usePostalt: {description: "Whether to use the postalt script from bwa kit."} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} - readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} - threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: "The produced BAM file." + outputBam: {description: "The produced BAM file."} + outputHla: {description: "The produced HLA file."} } } diff --git a/bwa.wdl b/bwa.wdl index fdeb870f..0f09f7a9 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -26,28 +26,30 @@ task Mem { File? read2 BwaIndex bwaIndex String outputPrefix - String? readgroup Boolean sixtyFour = false Boolean usePostalt = false - Int threads = 4 - Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - Int? memoryGb + + String? readgroup + Int? sortThreads + Int? memoryGb + + Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } - # Samtools sort may block the pipe while it is writing data to disk. + # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - # The bwa postalt script is out commented as soon as usePostalt = false. + # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e @@ -76,7 +78,7 @@ task Mem { runtime { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. - cpu: threads + cpu: threads memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage @@ -87,21 +89,21 @@ task Mem { read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} - usePostalt: {description: "Whether to use the postalt script from bwa kit."} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} - readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} - threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: "The produced BAM file." + outputBam: {description: "The produced BAM file."} + outputHla: {description: "The produced HLA file."} } } diff --git a/ccs.wdl b/ccs.wdl index cab15fea..4446937b 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -22,19 +22,20 @@ version 1.0 task CCS { input { + File subreadsFile + String outputPrefix Int minPasses = 3 Int minLength = 10 Int maxLength = 50000 Boolean byStrand = false Float minReadQuality = 0.99 String logLevel = "WARN" - File subreadsFile + File? subreadsIndexFile String? chunkString - String outputPrefix - - Int cores = 2 - String memory = "2G" + + Int threads = 2 + String memory = "4G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" } @@ -49,7 +50,7 @@ task CCS { ~{true="--by-strand" false="" byStrand} \ --min-rq ~{minReadQuality} \ --log-level ~{logLevel} \ - --num-threads ~{cores} \ + --num-threads ~{threads} \ ~{"--chunk " + chunkString} \ ~{"--report-json " + outputPrefix + ".ccs.report.json"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ @@ -65,7 +66,7 @@ task CCS { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -73,17 +74,17 @@ task CCS { parameter_meta { # inputs + subreadsFile: {description: "Subreads input file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} minPasses: {description: "Minimum number of full-length subreads required to generate ccs for a ZMW.", category: "advanced"} minLength: {description: "Minimum draft length before polishing.", category: "advanced"} maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} - subreadsFile: {description: "Subreads input file.", category: "required"} subreadsIndexFile: {description: "Index for the subreads input file, required when using chunkString.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/centrifuge.wdl b/centrifuge.wdl index 1e7a0b45..1637abdd 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -94,13 +94,13 @@ task Build { task Classify { input { + Array[File]+ read1 + Array[File] read2 = [] String inputFormat = "fastq" Boolean phred64 = false Int minHitLength = 22 Array[File]+ indexFiles - Array[File]+ read1 String outputPrefix - Array[File] read2 = [] Int? trim5 Int? trim3 @@ -155,13 +155,13 @@ task Classify { parameter_meta { # inputs + read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} + read2: {description: "List of files containing mate 2s.", category: "common"} inputFormat: {description: "The format of the read file(s).", category: "required"} phred64: {description: "If set to true, phred+64 encoding is used.", category: "required"} minHitLength: {description: "Minimum length of partial hits.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} - read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - read2: {description: "List of files containing mate 2s.", category: "common"} trim5: {description: "Trim bases from 5' (left) end of each read before alignment.", category: "common"} trim3: {description: "Trim bases from 3' (right) end of each read before alignment.", category: "common"} reportMaxDistinct: {description: "It searches for at most distinct, primary assignments for each read or pair.", category: "common"} diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 115c5ca4..844d6990 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -25,6 +25,7 @@ task ChunkedScatter { File inputFile String prefix = "./scatter" Boolean splitContigs = false + Int? chunkSize Int? overlap Int? minimumBasesPerFile @@ -57,15 +58,16 @@ task ChunkedScatter { } parameter_meta { + # inputs inputFile: {description: "Either a bed file describing regiosn of intrest or a sequence dictionary.", category: "required"} prefix: {description: "The prefix for the output files.", category: "advanced"} + splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} chunkSize: {description: "Equivalent to chunked-scatter's `-c` option.", category: "advanced"} overlap: {description: "Equivalent to chunked-scatter's `-o` option.", category: "advanced"} minimumBasesPerFile: {description: "Equivalent to chunked-scatter's `-m` option.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -76,9 +78,11 @@ task ScatterRegions { String prefix = "scatters/scatter-" Boolean splitContigs = false Int scatterSizeMillions = 1000 + Int? scatterSize - Int timeMinutes = 2 + String memory = "256M" + Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" } @@ -105,15 +109,14 @@ task ScatterRegions { } parameter_meta { + # inputs inputFile: {description: "The input file, either a bed file or a sequence dict. Which format is used is detected by the extension: '.bed', '.fai' or '.dict'.", category: "required"} prefix: {description: "The prefix of the ouput files. Output will be named like: .bed, in which N is an incrementing number. Default 'scatter-'.", category: "advanced"} splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/clever.wdl b/clever.wdl index 3a6515f7..75e889b3 100644 --- a/clever.wdl +++ b/clever.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -74,12 +72,12 @@ task Mateclever { indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} - maxOffset: {description: "The maximum center distance between split-read and read-pair deletion to be considered identical.", category: "advanced"} - maxLengthDiff: {description: "The maximum length difference between split-read and read-pair deletion to be considered identical.", category: "advanced"} - cleverMaxDelLength: {description: "The maximum deletion length to look for in Clever predictions.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - threads: {description: "The the number of threads required to run a program", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + cleverMaxDelLength: {description: "The maximum deletion length to look for in Clever predictions.", category: "advanced"} + maxLengthDiff: {description: "The maximum length difference between split-read and read-pair deletion to be considered identical.", category: "advanced"} + maxOffset: {description: "The maximum center distance between split-read and read-pair deletion to be considered identical.", category: "advanced"} + threads: {description: "The the number of threads required to run a program.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -128,8 +126,8 @@ task Prediction { bamIndex: {description: "The index bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - threads: {description: "The the number of threads required to run a program", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + threads: {description: "The the number of threads required to run a program.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/collect-columns.wdl b/collect-columns.wdl index fe41c5e8..67db6179 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -24,12 +24,13 @@ task CollectColumns { input { Array[File]+ inputTables String outputPath + Boolean header = false + Boolean sumOnDuplicateId = false + Int? featureColumn Int? valueColumn Int? separator Array[String]? sampleNames - Boolean header = false - Boolean sumOnDuplicateId = false Array[String]? additionalAttributes File? referenceGtf String? featureAttribute @@ -67,20 +68,20 @@ task CollectColumns { } parameter_meta { + # inputs inputTables: {description: "The tables from which columns should be taken.", category: "required"} outputPath: {description: "The path to which the output should be written.", category: "required"} + header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} + sumOnDuplicateId: {description: "Equivalent to the -S flag of collect-columns.", category: "advanced"} featureColumn: {description: "Equivalent to the -f option of collect-columns.", category: "advanced"} valueColumn: {description: "Equivalent to the -c option of collect-columns.", category: "advanced"} separator: {description: "Equivalent to the -s option of collect-columns.", category: "advanced"} sampleNames: {description: "Equivalent to the -n option of collect-columns.", category: "advanced"} - header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} - sumOnDuplicateId: {description: "Equivalent to the -S flag of collect-columns.", category: "advanced"} additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", category: "advanced"} referenceGtf: {description: "Equivalent to the -g option of collect-columns.", category: "advanced"} featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} - memoryGb: {description: "The maximum amount of memory the job will need in GB", category: "advanced"} + memoryGb: {description: "The maximum amount of memory the job will need in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/common.wdl b/common.wdl index e96cc1c8..b3878bb6 100644 --- a/common.wdl +++ b/common.wdl @@ -45,7 +45,7 @@ task CheckFileMD5 { input { File file String md5 - # By default cromwell expects /bin/bash to be present in the container + # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" @@ -71,7 +71,7 @@ task ConcatenateTextFiles { Boolean zip = false } - # When input and output is both compressed decompression is not needed + # When input and output is both compressed decompression is not needed. String cmdPrefix = if (unzip && !zip) then "zcat " else "cat " String cmdSuffix = if (!unzip && zip) then " | gzip -c " else "" @@ -116,8 +116,8 @@ task Copy { } task CreateLink { - # Making this of type File will create a link to the copy of the file in the execution - # folder, instead of the actual file. + # Making this of type File will create a link to the copy of the file in + # the execution folder, instead of the actual file. # This cannot be propperly call-cached or used within a container. input { String inputFile @@ -182,6 +182,7 @@ task TextToFile { input { String text String outputFile = "out.txt" + Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -194,18 +195,19 @@ task TextToFile { File out = outputFile } - parameter_meta { - text: {description: "The text to print", category: "required"} - outputFile: {description: "The name of the output file", category: "common"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } runtime { memory: "1G" time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + text: {description: "The text to print.", category: "required"} + outputFile: {description: "The name of the output file.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } } task YamlToJson { @@ -213,11 +215,12 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - Int timeMinutes = 1 String memory = "128M" + Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } + command { set -e mkdir -p "$(dirname ~{outputJson})" @@ -230,6 +233,7 @@ task YamlToJson { json.dump(content, output_json) CODE } + output { File json = outputJson } @@ -241,12 +245,12 @@ task YamlToJson { } parameter_meta { + # inputs yaml: {description: "The YAML file to convert.", category: "required"} outputJson: {description: "The location the output JSON file should be written to.", category: "advanced"} memory: {description: "The maximum amount of memory the job will need.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/cutadapt.wdl b/cutadapt.wdl index 7faeaff1..74f57912 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -32,6 +32,14 @@ task Cutadapt { Array[String] adapterRead2 = [] Array[String] frontRead2 = [] Array[String] anywhereRead2 = [] + String reportPath = "cutadapt_report.txt" + # Cutadapt compresses the zipped output files with a ridiculously + # high compression level (5 or 6). + # This is not the fast compression preset. It takes up to 400% more + # CPU time for a 20% reduction in file size. + # Hence we use compression level 1 here. + Int compressionLevel = 1 # This only affects outputs with the .gz suffix. + Boolean? interleaved String? pairFilter Float? errorRate @@ -52,7 +60,7 @@ task Cutadapt { String? stripSuffix String? prefix String? suffix - Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads. + Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads. Int? maximumLength Int? maxN Boolean? discardTrimmed @@ -73,11 +81,7 @@ task Cutadapt { Boolean? bwa Boolean? zeroCap Boolean? noZeroCap - String reportPath = "cutadapt_report.txt" - # Cutadapt compresses the zipped output files with a ridiculously high compression level (5 or 6). - # This is not the fast compression preset. It takes up to 400% more CPU time for a 20% reduction in file size. - # Hence we use compression level 1 here. - Int compressionLevel = 1 # This only affects outputs with the .gz suffix. + Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) @@ -152,8 +156,8 @@ task Cutadapt { output{ File cutRead1 = read1output - File? cutRead2 = read2output File report = reportPath + File? cutRead2 = read2output File? tooLongOutput=tooLongOutputPath File? tooShortOutput=tooShortOutputPath File? untrimmedOutput=untrimmedOutputPath @@ -173,22 +177,19 @@ task Cutadapt { } parameter_meta { + # inputs read1: {description: "The first or single end fastq file to be run through cutadapt.", category: "required"} read2: {description: "An optional second end fastq file to be run through cutadapt.", category: "common"} read1output: {description: "The name of the resulting first or single end fastq file.", category: "common"} read2output: {description: "The name of the resulting second end fastq file.", category: "common"} - adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "common"} - front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced"} - anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced"} - adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", - category: "common"} - frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced"} - anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced"} + adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "common"} + front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"} + anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"} + adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", category: "common"} + frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"} + anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"} + reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", category: "common"} + compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"} interleaved: {description: "Equivalent to cutadapt's --interleaved flag.", category: "advanced"} pairFilter: {description: "Equivalent to cutadapt's --pair-filter option.", category: "advanced"} errorRate: {description: "Equivalent to cutadapt's --error-rate option.", category: "advanced"} @@ -230,13 +231,9 @@ task Cutadapt { bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"} zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"} noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"} - reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", - category: "common"} - compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"} cores: {description: "The number of cores to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/deepvariant.wdl b/deepvariant.wdl index f5661886..20bf8e27 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -28,6 +28,7 @@ task RunDeepVariant { File inputBamIndex String modelType String outputVcf + String? postprocessVariantsExtraArgs File? customizedModel Int? numShards @@ -43,7 +44,6 @@ task RunDeepVariant { command { set -e - /opt/deepvariant/bin/run_deepvariant \ --ref ~{referenceFasta} \ --reads ~{inputBam} \ @@ -59,36 +59,36 @@ task RunDeepVariant { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } output { File outputVCF = outputVcf File outputVCFIndex = outputVCF + ".tbi" + Array[File] outputVCFStatsReport = glob("*.visual_report.html") File? outputGVCF = outputGVcf File? outputGVCFIndex = outputGVcf + ".tbi" - Array[File] outputVCFStatsReport = glob("*.visual_report.html") } - + parameter_meta { - referenceFasta: {description: "Genome reference to use", category: "required"} + # inputs + referenceFasta: {description: "Genome reference to use.", category: "required"} referenceFastaIndex: {description: "Index for the genome reference file.", category: "required"} inputBam: {description: "Aligned, sorted, indexed BAM file containing the reads we want to call.", category: "required"} inputBamIndex: {description: "Index for the input bam file.", category: "required"} - modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag", category: "required"} + modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag.", category: "required"} outputVcf: {description: "Path where we should write VCF file.", category: "required"} - customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used", category: "advanced"} + postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} + customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used"., category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} - postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From b131d926dd3cb7e2dc59adecb015fa09d1e3d3bc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:11:41 +0100 Subject: [PATCH 126/668] Edit another batch of tasks to uniform layout. --- bam2fastx.wdl | 10 +- delly.wdl | 6 +- fastqc.wdl | 49 ++-- fastqsplitter.wdl | 22 +- flash.wdl | 12 +- gatk.wdl | 612 +++++++++++++++++++++------------------------- gffcompare.wdl | 2 +- 7 files changed, 338 insertions(+), 375 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index e8884ab0..1b911dbb 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -37,18 +37,18 @@ task Bam2Fasta { command { set -e - mkdir -p "$(dirname ~{outputPrefix})"' + mkdir -p "$(dirname ~{outputPrefix})" # Localise the bam and pbi files so they are next to each other in the # current folder. bamFiles="" - for bamFile in ~{sep=" " bam}; + for bamFile in ~{sep=" " bam} do ln ${bamFile} . bamFiles=${bamFiles}" $(basename ${bamFile})" done - for index in ~{sep=" " bamIndex}; + for index in ~{sep=" " bamIndex} do ln ${index} . done @@ -110,13 +110,13 @@ task Bam2Fastq { # Localise the bam and pbi files so they are next to each other in the # current folder. bamFiles="" - for bamFile in ~{sep=" " bam}; + for bamFile in ~{sep=" " bam} do ln ${bamFile} . bamFiles=${bamFiles}" $(basename ${bamFile})" done - for index in ~{sep=" " bamIndex}; + for index in ~{sep=" " bamIndex} do ln ${index} . done diff --git a/delly.wdl b/delly.wdl index f708f494..ffe9023a 100644 --- a/delly.wdl +++ b/delly.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -59,9 +57,9 @@ task CallSV { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} - referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The memory required to run the programs", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/fastqc.wdl b/fastqc.wdl index 04b6813f..dd3dfc2e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -29,6 +29,7 @@ task Fastqc { Boolean noFilter = false Boolean extract = false Boolean nogroup = false + Int? minLength String? format File? contaminants @@ -37,32 +38,35 @@ task Fastqc { Int? kmers String? dir - Int threads = 1 # Set javaXmx a little high. Equal to fastqc default with 7 threads. # This is because some fastq files need more memory. 2G per core # is a nice cluster default, so we use all the rest of the memory for # fastqc so we should have as little OOM crashes as possible even with # weird edge case fastq's. - String javaXmx="1750M" + String javaXmx="1750M" + Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" - Array[File]? NoneArray - File? NoneFile + String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0 + + Array[File]? noneArray + File? noneFile } # Chops of the .gz extension if present. - # The Basename needs to be taken here. Otherwise paths might differ between similar jobs. + # The Basename needs to be taken here. Otherwise paths might differ + # between similar jobs. String name = basename(sub(seqFile, "\.gz$","")) - # This regex chops of the extension and replaces it with _fastqc for the reportdir. + # This regex chops of the extension and replaces it with _fastqc for + # the reportdir. # Just as fastqc does it. String reportDir = outdirPath + "/" + sub(name, "\.[^\.]*$", "_fastqc") - # We reimplement the perl wrapper here. This has the advantage that it gives - # us more control over the amount of memory used. + # We reimplement the perl wrapper here. This has the advantage that it + # gives us more control over the amount of memory used. command <<< set -e - mkdir -p ~{outdirPath} + mkdir -p "~{outdirPath}" FASTQC_DIR="/usr/local/opt/fastqc-0.11.9" export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \ @@ -86,23 +90,24 @@ task Fastqc { >>> output { - File? rawReport = if extract then reportDir + "/fastqc_data.txt" else NoneFile File htmlReport = reportDir + ".html" File reportZip = reportDir + ".zip" - File? summary = if extract then reportDir + "/summary.txt" else NoneFile - Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else NoneArray + File? summary = if extract then reportDir + "/summary.txt" else noneFile + File? rawReport = if extract then reportDir + "/fastqc_data.txt" else noneFile + Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else noneArray } runtime { cpu: threads memory: memory - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs seqFile: {description: "A fastq file.", category: "required"} - outdirPath: {description: "The path to write the output to", catgory: "required"} + outdirPath: {description: "The path to write the output to.", catgory: "required"} casava: {description: "Equivalent to fastqc's --casava flag.", category: "advanced"} nano: {description: "Equivalent to fastqc's --nano flag.", category: "advanced"} noFilter: {description: "Equivalent to fastqc's --nofilter flag.", category: "advanced"} @@ -115,18 +120,16 @@ task Fastqc { limits: {description: "Equivalent to fastqc's --limits option.", category: "advanced"} kmers: {description: "Equivalent to fastqc's --kmers option.", category: "advanced"} dir: {description: "Equivalent to fastqc's --dir option.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} threads: {description: "The number of cores to use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { WDL_AID: { - exclude: ["NoneFile", "NoneArray"] + exclude: ["noneFile", "noneArray"] } } } @@ -155,14 +158,14 @@ task GetConfiguration { } runtime { - memory: "2G" # Needs more than 1 to pull the docker image + memory: "2G" # Needs more than 1 to pull the docker image. time_minute: timeMinutes docker: dockerImage } parameter_meta { + # inputs timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/fastqsplitter.wdl b/fastqsplitter.wdl index c523cf8a..25a50954 100644 --- a/fastqsplitter.wdl +++ b/fastqsplitter.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -26,19 +24,24 @@ task Fastqsplitter { input { File inputFastq Array[String]+ outputPaths - String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1" + Int? compressionLevel Int? threadsPerFile - # fastqplitter utilizes one thread per input file and one or more threads per output file + one thread for the application. - # Since a compression level of 1 is used, each output file uses approx 0.5 cores. + + # fastqplitter utilizes one thread per input file and one or + # more threads per output file + one thread for the application. + # Since a compression level of 1 is used, each output file + # uses approx 0.5 cores. Int cores = 1 + ceil(0.5 * length(outputPaths)) + String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1" } # Busybox mkdir does not accept multiple paths. command <<< set -e for FILE in ~{sep=' ' outputPaths} - do mkdir -p "$(dirname $FILE)" + do + mkdir -p "$(dirname ${FILE})" done fastqsplitter \ ~{"-c " + compressionLevel} \ @@ -51,15 +54,16 @@ task Fastqsplitter { Array[File] chunks = outputPaths } - # Using very safe margins here. 10MB/300MB per outputfile is used for single-threaded/multi-threaded compression. + # Using very safe margins here. 10MB/300MB per outputfile is used for + # single-threaded/multi-threaded compression. Float memoryPerFile = if select_first([threadsPerFile, 1]) > 1 then 0.40 else 0.02 Int fastqsplitterMemory = ceil(0.100 + memoryPerFile * length(outputPaths)) - # Make sure a minimum of 2 GB is present to pull the singularity image + # Make sure a minimum of 2 GB is present to pull the singularity image. Int memory = if fastqsplitterMemory <= 2 then 2 else fastqsplitterMemory runtime { + cpu: cores memory: "~{memory}G" docker: dockerImage - cpu: cores } } diff --git a/flash.wdl b/flash.wdl index 6e704921..c4554c50 100644 --- a/flash.wdl +++ b/flash.wdl @@ -24,13 +24,14 @@ import "common.wdl" as common task Flash { input { - String? preCommand FastqPair inputFastq String outdirPath String outPrefix = "flash" + Boolean compress = true + + String? preCommand Int? minOverlap Int? maxOverlap - Boolean compress = true Int threads = 2 String memory = "2G" @@ -55,8 +56,8 @@ task Flash { File notCombined1 = outdirPath + "/" + outPrefix + ".notCombined_1.fastq.gz" File notCombined2 = outdirPath + "/" + outPrefix + ".notCombined_2.fastq.gz" FastqPair notCombined = object { - R1: notCombined1, - R2: notCombined2 + R1: notCombined1, + R2: notCombined2 } File hist = outdirPath + "/" + outPrefix + ".hist" File histogram = outdirPath + "/" + outPrefix + ".histogram" @@ -66,5 +67,4 @@ task Flash { cpu: threads memory: memory } - -} \ No newline at end of file +} diff --git a/gatk.wdl b/gatk.wdl index 12416dda..cc5d1de5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -28,12 +28,13 @@ task AnnotateIntervals { String annotatedIntervalsPath = "intervals.annotated.tsv" File intervals String intervalMergingRule = "OVERLAPPING_ONLY" + Int featureQueryLookahead = 1000000 + File? mappabilityTrack File? segmentalDuplicationTrack - Int featureQueryLookahead = 1000000 - String memory = "3G" String javaXmx = "2G" + String memory = "3G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -57,9 +58,9 @@ task AnnotateIntervals { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -71,17 +72,15 @@ task AnnotateIntervals { intervalMergingRule: {description: "Equivalent to gatk AnnotateIntervals' `--interval-merging-rule` option.", category: "advanced"} mappabilityTrack: {description: "Equivalent to gatk AnnotateIntervals' `--mappability-track` option.", category: "common"} segmentalDuplicationTrack: {description: "Equivalent to gatk AnnotateIntervals' `--segmenta-duplicarion-track` option.", category: "common"} - featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option", category: "advanced"} + featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Apply Base Quality Score Recalibration (BQSR) model +# Apply Base Quality Score Recalibration (BQSR) model. task ApplyBQSR { input { File inputBam @@ -93,9 +92,11 @@ task ApplyBQSR { File referenceFastaDict File referenceFastaFai - Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 2048 - Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. + Int memoryMb = javaXmxMb + 512 + # This will likely be used with intervals, as such size based + # estimation can't be used. + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -124,33 +125,29 @@ task ApplyBQSR { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The BAM file which should be recalibrated.", category: "required"} inputBamIndex: {description: "The input BAM file's index.", category: "required"} outputBamPath: {description: "The location the resulting BAM file should be written.", category: "required"} recalibrationReport: {description: "The BQSR report the be used for recalibration.", category: "required"} sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Generate Base Quality Score Recalibration (BQSR) model +# Generate Base Quality Score Recalibration (BQSR) model. task BaseRecalibrator { input { File inputBam @@ -159,14 +156,15 @@ task BaseRecalibrator { Array[File] sequenceGroupInterval = [] Array[File] knownIndelsSitesVCFs = [] Array[File] knownIndelsSitesVCFIndexes = [] - File? dbsnpVCF - File? dbsnpVCFIndex File referenceFasta File referenceFastaDict File referenceFastaFai - Int memoryMb = javaXmxMb + 512 + File? dbsnpVCF + File? dbsnpVCFIndex + Int javaXmxMb = 1024 + Int memoryMb = javaXmxMb + 512 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -190,42 +188,39 @@ task BaseRecalibrator { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The BAM file to generate a BQSR report for.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} recalibrationReportPath: {description: "The location to write the BQSR report to.", category: "required"} sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"} knownIndelsSitesVCFs: {description: "VCF files with known indels.", category: "advanced"} knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs.", category: "advanced"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task CalculateContamination { input { File tumorPileups + File? normalPileups - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -246,20 +241,19 @@ task CalculateContamination { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs tumorPileups: {description: "The pileup summary of a tumor/case sample.", category: "required"} normalPileups: {description: "The pileup summary of the normal/control sample.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -268,8 +262,8 @@ task CallCopyRatioSegments { String outputPrefix File copyRatioSegments - String memory = "3G" String javaXmx = "2G" + String memory = "3G" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -289,20 +283,19 @@ task CallCopyRatioSegments { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputPrefix: {description: "The prefix for the output files.", category: "required"} copyRatioSegments: {description: "The copy ratios file generated by gatk ModelSegments.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -310,15 +303,16 @@ task CollectAllelicCounts { input { String allelicCountsPath = "allelic_counts.tsv" File commonVariantSites - File? commonVariantSitesIndex File inputBam File inputBamIndex File referenceFasta File referenceFastaDict File referenceFastaFai - String memory = "11G" + File? commonVariantSitesIndex + String javaXmx = "10G" + String memory = "11G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -339,26 +333,25 @@ task CollectAllelicCounts { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs allelicCountsPath: {description: "The path the output should be written to.", category: "advanced"} commonVariantSites: {description: "Interval list or vcf of common variant sites (to retrieve the allelic counts for).", category: "required"} - commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"} inputBam: {description: "The BAM file to generate counts for.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -373,8 +366,8 @@ task CollectReadCounts { File referenceFastaFai String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "8G" String javaXmx = "7G" + String memory = "8G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -397,12 +390,13 @@ task CollectReadCounts { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs countsPath: {description: "The location the output should be written to.", category: "advanced"} intervals: {description: "The intervals to collect counts for.", category: "required"} inputBam: {description: "The BAM file to determine the coverage for.", category: "required"} @@ -411,12 +405,10 @@ task CollectReadCounts { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} intervalMergingRule: {description: "Equivalent to gatk CollectReadCounts' `--interval-merging-rule` option.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -430,8 +422,8 @@ task CombineGVCFs { File referenceFastaDict File referenceFastaFai - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -453,28 +445,24 @@ task CombineGVCFs { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gvcfFiles: {description: "The GVCF files to be combined.", category: "required"} gvcfFilesIndex: {description: "The indexes for the GVCF files.", caregory: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} outputPath: {description: "The location the combined GVCF should be written to.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -486,12 +474,12 @@ task CombineVariants { String genotypeMergeOption = "UNIQUIFY" String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED" Array[String]+ identifiers - Array[File]+ variantVcfs # follow "identifiers" array order + Array[File]+ variantVcfs # Follow "identifiers" array order. Array[File]+ variantIndexes String outputPath - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" } @@ -499,17 +487,17 @@ task CombineVariants { command <<< set -e mkdir -p "$(dirname ~{outputPath})" - - # build "-V: " arguments according to IDs and VCFs to merge - # Make sure commands are run in bash + # Build "-V: " arguments according to IDs + # and VCFs to merge. + # Make sure commands are run in bash. V_args=$(bash -c ' set -eu ids=(~{sep=" " identifiers}) vars=(~{sep=" " variantVcfs}) for (( i = 0; i < ${#ids[@]}; ++i )) - do + do printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" - done + done ') java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 -jar /usr/GenomeAnalysisTK.jar \ -T CombineVariants \ @@ -526,12 +514,13 @@ task CombineVariants { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} @@ -540,14 +529,11 @@ task CombineVariants { identifiers: {description: "The sample identifiers in the same order as variantVcfs.", category: "required"} variantVcfs: {description: "The input VCF files in the same order as identifiers.", category: "required"} variantIndexes: {description: "The indexes of the input VCF files.", category: "required"} - outputPath: {description: "The location the output should be written to", category: "required"} - + outputPath: {description: "The location the output should be written to.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -555,10 +541,11 @@ task CreateReadCountPanelOfNormals { input { String PONpath = "PON.hdf5" Array[File]+ readCountsFiles + File? annotatedIntervals - String memory = "8G" String javaXmx = "7G" + String memory = "8G" Int timeMinutes = 5 String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason... } @@ -578,34 +565,33 @@ task CreateReadCountPanelOfNormals { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs PONpath: {description: "The location the PON should be written to.", category: "common"} readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "required"} - annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", - category: "advanced"} + annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task DenoiseReadCounts { input { - File? PON - File? annotatedIntervals File readCounts String outputPrefix - String memory = "5G" + File? PON + File? annotatedIntervals + String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -628,23 +614,21 @@ task DenoiseReadCounts { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"} - annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.", - category: "advanced"} + # inputs readCounts: {description: "The read counts file as generated by CollectReadCounts.", category: "required"} outputPrefix: {description: "The prefix for the output files.", category: "required"} + PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"} + annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -656,14 +640,15 @@ task FilterMutectCalls { File unfilteredVcf File unfilteredVcfIndex String outputVcf + Int uniqueAltReadCount = 4 + File mutect2Stats + File? contaminationTable File? mafTumorSegments File? artifactPriors - Int uniqueAltReadCount = 4 - File mutect2Stats - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -692,41 +677,39 @@ task FilterMutectCalls { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} unfilteredVcf: {description: "An unfiltered VCF file as produced by Mutect2.", category: "required"} unfilteredVcfIndex: {description: "The index of the unfiltered VCF file.", category: "required"} outputVcf: {description: "The location the filtered VCF file should be written.", category: "required"} + uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"} + mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"} contaminationTable: {description: "Equivalent to FilterMutectCalls' `--contamination-table` option.", category: "advanced"} mafTumorSegments: {description: "Equivalent to FilterMutectCalls' `--tumor-segmentation` option.", category: "advanced"} artifactPriors: {description: "Equivalent to FilterMutectCalls' `--ob-priors` option.", category: "advanced"} - uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"} - mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Combine multiple recalibration tables from scattered BaseRecalibrator runs +# Combine multiple recalibration tables from scattered BaseRecalibrator runs. task GatherBqsrReports { input { Array[File] inputBQSRreports String outputReportPath - Int memoryMb = 256 + javaXmxMb Int javaXmxMb = 256 + Int memoryMb = 256 + javaXmxMb Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -745,21 +728,19 @@ task GatherBqsrReports { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBQSRreports: {description: "The BQSR reports to be merged.", category: "required"} outputReportPath: {description: "The location of the combined BQSR report.", category: "required"} - + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -770,9 +751,11 @@ task GenomicsDBImport { Array[File]+ intervals String genomicsDBWorkspacePath = "genomics_db" String genomicsDBTarFile = "genomics_db.tar.gz" + String? tmpDir - String memory = "5G" + String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -794,25 +777,23 @@ task GenomicsDBImport { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gvcfFiles: {description: "The gvcfFiles to be merged.", category: "required"} gvcfFilesIndex: {description: "Indexes for the gvcfFiles.", category: "required"} intervals: {description: "intervals over which to operate.", category: "required"} - genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored", category: "advanced"} - genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored", category: "advanced"} - tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers", - category: "advanced"} + genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored.", category: "advanced"} + genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored.", category: "advanced"} + tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -820,18 +801,19 @@ task GenotypeGVCFs { input { File gvcfFile File gvcfFileIndex - Array[File]? intervals String outputPath File referenceFasta File referenceFastaDict File referenceFastaFai Array[String] annotationGroups = ["StandardAnnotation"] + + Array[File]? intervals File? dbsnpVCF File? dbsnpVCFIndex File? pedigree - String memory = "7G" String javaXmx = "6G" + String memory = "7G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -854,35 +836,31 @@ task GenotypeGVCFs { output { File outputVCF = outputPath File outputVCFIndex = outputPath + ".tbi" - } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"} gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"} - intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"} outputPath: {description: "The location to write the output VCF file to.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - annotationGroups: {description: "Which annotation groups will be used for the annotation", category: "advanced"} + annotationGroups: {description: "Which annotation groups will be used for the annotation.", category: "advanced"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} + pedigree: {description: "Pedigree file for determining the population \"founders\".", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -896,8 +874,8 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -917,12 +895,13 @@ task GetPileupSummaries { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs sampleBam: {description: "A BAM file for which a pileup should be created.", category: "required"} sampleBamIndex: {description: "The index of the input BAM file.", category: "required"} variantsForContamination: {description: "A VCF file with common variants.", category: "required"} @@ -930,13 +909,10 @@ task GetPileupSummaries { sitesForContamination: {description: "A bed file describing regions to operate on.", category: "required"} sitesForContaminationIndex: {description: "The index for the bed file.", category: "required"} outputPrefix: {description: "The prefix for the ouput.", category: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -945,26 +921,27 @@ task HaplotypeCaller { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Array[File]+? intervalList - Array[File]+? excludeIntervalList String outputPath File referenceFasta File referenceFastaIndex File referenceFastaDict + Boolean gvcf = false + String emitRefConfidence = if gvcf then "GVCF" else "NONE" + Boolean dontUseSoftClippedBases = false + + Array[File]+? intervalList + Array[File]+? excludeIntervalList Float? contamination File? dbsnpVCF File? dbsnpVCFIndex File? pedigree Int? ploidy String? outputMode - Boolean gvcf = false - String emitRefConfidence = if gvcf then "GVCF" else "NONE" - Boolean dontUseSoftClippedBases = false Float? standardMinConfidenceThresholdForCalling - Int memoryMb = javaXmxMb + 512 - # Memory increases with time used. 4G should cover most use cases. Int javaXmxMb = 4096 + # Memory increases with time used. 4G should cover most use cases. + Int memoryMb = javaXmxMb + 512 Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -995,50 +972,44 @@ task HaplotypeCaller { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} - excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} outputPath: {description: "The location to write the output to.", category: "required"} - ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} - gvcf: {description: "Whether the output should be a gvcf", category: "common"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaIndex: {description: "The index for the reference fasta file.", category: "required"} - contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} - outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.", - category: "advanced"} - emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'", - category: "advanced"} + gvcf: {description: "Whether the output should be a gvcf.", category: "common"} + emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'.", category: "advanced"} dontUseSoftClippedBases: {description: "Do not use soft-clipped bases. Should be 'true' for RNA variant calling.", category: "common"} - standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"} + intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} + excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} + contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} + pedigree: {description: "Pedigree file for determining the population \"founders\".", category: "common"} + ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} + outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.", category: "advanced"} + standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1056,19 +1027,18 @@ task LearnReadOrientationModel { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs f1r2TarGz: {description: "A f1r2TarGz file outputed by mutect2.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1076,8 +1046,8 @@ task MergeStats { input { Array[File]+ stats - String memory = "15G" String javaXmx = "14G" + String memory = "15G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1095,19 +1065,18 @@ task MergeStats { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs stats: {description: "Statistics files to be merged.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1117,14 +1086,13 @@ task ModelSegments { String outputPrefix File denoisedCopyRatios File allelicCounts - File? normalAllelicCounts - Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts) - then 0 - else 30 + Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts) then 0 else 30 Int maximumNumberOfSmoothingIterations = 10 - String memory = "11G" + File? normalAllelicCounts + String javaXmx = "10G" + String memory = "11G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1145,7 +1113,6 @@ task ModelSegments { output { File hetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.tsv" - File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" File copyRatioSegments = outputDir + "/" + outputPrefix + ".cr.seg" File copyRatioCBS = outputDir + "/" + outputPrefix + ".cr.igv.seg" File alleleFractionCBS = outputDir + "/" + outputPrefix + ".af.igv.seg" @@ -1155,29 +1122,28 @@ task ModelSegments { File modeledSegments = outputDir + "/" + outputPrefix + ".modelFinal.seg" File copyRatioParameters = outputDir + "/" + outputPrefix + ".modelFinal.cr.param" File alleleFractionParameters = outputDir + "/" + outputPrefix + ".modelFinal.af.param" + File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" } runtime { - docker: dockerImage - time_minute: timeMinutes memory: memory + time_minute: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputDir: {description: "The directory to write the ouput to.", category: "common"} outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} allelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts.", category: "required" } - normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"} minimumTotalAlleleCountCase: {description: "Equivalent to gatk ModelSeqments' `--minimum-total-allele-count-case` option.", category: "advanced"} maximumNumberOfSmoothingIterations: {description: "Equivalent to gatk ModelSeqments' `--maximum-number-of-smoothing-iterations` option.", category: "advanced"} - + normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1190,17 +1156,18 @@ task MuTect2 { File referenceFastaFai String outputVcf String tumorSample + String f1r2TarGz = "f1r2.tar.gz" + Array[File]+ intervals + String outputStats = outputVcf + ".stats" + String? normalSample File? germlineResource File? germlineResourceIndex File? panelOfNormals File? panelOfNormalsIndex - String f1r2TarGz = "f1r2.tar.gz" - Array[File]+ intervals - String outputStats = outputVcf + ".stats" - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1229,12 +1196,13 @@ task MuTect2 { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} @@ -1242,20 +1210,18 @@ task MuTect2 { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputVcf: {description: "The location to write the output VCF file to.", category: "required"} tumorSample: {description: "The name of the tumor/case sample.", category: "required"} + f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"} + intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"} + outputStats: {description: "The location the output statistics should be written to.", category: "advanced"} normalSample: {description: "The name of the normal/control sample.", category: "common"} germlineResource: {description: "Equivalent to Mutect2's `--germline-resource` option.", category: "advanced"} germlineResourceIndex: {description: "The index for the germline resource.", category: "advanced"} panelOfNormals: {description: "Equivalent to Mutect2's `--panel-of-normals` option.", category: "advanced"} panelOfNormalsIndex: {description: "The index for the panel of normals.", category: "advanced"} - f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"} - intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"} - outputStats: {description: "The location the output statistics should be written to.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1266,10 +1232,11 @@ task PlotDenoisedCopyRatios { String outputPrefix File standardizedCopyRatios File denoisedCopyRatios + Int? minimumContigLength - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1289,32 +1256,31 @@ task PlotDenoisedCopyRatios { output { File denoisedCopyRatiosPlot = outputDir + "/" + outputPrefix + ".denoised.png" - File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" File standardizedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".standardizedMAD.txt" File denoisedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".denoisedMAD.txt" File deltaMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".deltaMAD.txt" File deltaScaledMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".scaledDeltaMAD.txt" + File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} outputDir: {description: "The directory to write the ouput to.", category: "common"} outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} - denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} standardizedCopyRatios: {description: "The standardized copy ratios as generated by DenoiseReadCounts.", category: "required"} + denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1326,10 +1292,11 @@ task PlotModeledSegments { File denoisedCopyRatios File segments File allelicCounts + Int? minimumContigLength - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1353,12 +1320,13 @@ task PlotModeledSegments { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} outputDir: {description: "The directory to write the ouput to.", category: "common"} outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} @@ -1366,12 +1334,10 @@ task PlotModeledSegments { segments: {description: "The modeled segments as generated by ModelSegments.", category: "required"} allelicCounts: {description: "The hetrozygous allelic counts as generated by ModelSegments.", category: "required"} minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1380,14 +1346,15 @@ task PreprocessIntervals { File referenceFasta File referenceFastaDict File referenceFastaFai - File? intervals String outputIntervalList = "bins.interval_list" Int binLength = if defined(intervals) then 0 else 1000 Int padding = if defined(intervals) then 250 else 0 String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "4G" + File? intervals + String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1411,41 +1378,42 @@ task PreprocessIntervals { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - referenceFasta: {description: "The reference fasta file..", category: "required"} + # inputs + referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"} outputIntervalList: {description: "The location the output should be written to.", category: "advanced"} binLength: {description: "The size of the bins to be created. Should be 0 for targeted/exome sequencing.", category: "advanced"} padding: {description: "The padding to be added to the bins. Should be 0 if contiguos binning is used, eg with WGS.", category: "advanced"} intervalMergingRule: {description: "Equivalent to gatk PreprocessIntervals' `--interval-merging-rule` option.", category: "advanced"} + intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task SelectVariants { input { + File inputVcf + File inputVcfIndex File referenceFasta File referenceFastaDict File referenceFastaFai - File inputVcf - File inputVcfIndex String outputPath = "output.vcf.gz" - String? selectTypeToInclude Array[File] intervals = [] - String memory = "5G" + + String? selectTypeToInclude + String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1468,29 +1436,25 @@ task SelectVariants { } runtime { - docker: dockerImage - time_minute: timeMinutes memory: memory + time_minute: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputVcf: {description: "The VCF input file.", category: "required"} inputVcfIndex: {description: "The input VCF file's index.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - selectTypeToInclude: {description: "Select only a certain type of variants from the input file", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "advanced"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} - + selectTypeToInclude: {description: "Select only a certain type of variants from the input file.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1504,8 +1468,8 @@ task SplitNCigarReads { String outputBam Array[File] intervals = [] - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1527,28 +1491,24 @@ task SplitNCigarReads { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The BAM file for which spliced reads should be split.", category: "required"} inputBamIndex: {description: "The input BAM file's index.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputBam: {description: "The location the output BAM file should be written.", category: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1558,11 +1518,6 @@ task VariantEval { Array[File] evalVcfsIndex Array[File] comparisonVcfs = [] Array[File] comparisonVcfsIndex = [] - File? referenceFasta - File? referenceFastaDict - File? referenceFastaFai - File? dbsnpVCF - File? dbsnpVCFIndex Array[File] intervals = [] String outputPath = "eval.table" Boolean doNotUseAllStandardModules = false @@ -1572,8 +1527,14 @@ task VariantEval { Array[String] samples = [] Boolean mergeEvals = false - String memory = "5G" + File? referenceFasta + File? referenceFastaDict + File? referenceFastaFai + File? dbsnpVCF + File? dbsnpVCFIndex + String javaXmx = "4G" + String memory = "5G" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" @@ -1604,35 +1565,37 @@ task VariantEval { runtime { cpu: 1 - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } + parameter_meta { + # inputs evalVcfs: {description: "Variant sets to evaluate.", category: "required"} evalVcfsIndex: {description: "Indexes for the variant sets.", category: "required"} comparisonVcfs: {description: "Compare set vcfs.", category: "advanced"} comparisonVcfsIndex: {description: "Indexes for the compare sets.", category: "advanced"} - evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true)", category: "common"} - stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true)", category: "common"} - samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"} # Advanced because this description is impossible to understand... - mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track", category: "common"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} + outputPath: {description: "The location the output table should be written.", category: "advanced"} doNotUseAllStandardModules: {description: "Do not use the standard modules by default (instead, only those that are specified with the evalModules option).", category: "common"} doNotUseAllStandardStratifications: {description: "Do not use the standard stratification modules by default (instead, only those that are specified with the stratificationModules option).", category: "common"} + evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true).", category: "common"} + stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true).", category: "common"} + samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"} + mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "common"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "common"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "common"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - outputPath: {description: "The location the output table should be written.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } + task VariantFiltration { input { File inputVcf @@ -1644,8 +1607,8 @@ task VariantFiltration { Array[String]+ filterArguments Array[File] intervals = [] - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1668,29 +1631,24 @@ task VariantFiltration { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputVcf: {description: "The VCF to be filtered.", category: "required"} inputVcfIndex: {description: "The input VCF file's index.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2'].", category: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} - filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2']", - category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/gffcompare.wdl b/gffcompare.wdl index e5f62b5e..5d80f619 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -147,4 +147,4 @@ task GffCompare { exclude: ["noneFile"] } } -} \ No newline at end of file +} From 08d6519a05a9e297decbe81e0e29c633ea07e14f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:27:38 +0100 Subject: [PATCH 127/668] Try to fix Travis error. --- bam2fastx.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 1b911dbb..0585de23 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -112,13 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - ln ${bamFile} . - bamFiles=${bamFiles}" $(basename ${bamFile})" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln ${index} . + ln $index . done bam2fastq \ @@ -126,7 +126,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} + $bamFiles } output { From 9f77348d7a353e93b1f2a57b02942a93107ea634 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:30:59 +0100 Subject: [PATCH 128/668] Fix second task as well. --- bam2fastx.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0585de23..2ad08581 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - ln ${bamFile} . - bamFiles=${bamFiles}" $(basename ${bamFile})" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln ${index} . + ln $index . done bam2fasta \ From 840a37d19727ee6edb790287cfd447e9964ce669 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:39:16 +0100 Subject: [PATCH 129/668] Fix a third Travis error. --- deepvariant.wdl | 2 +- gffcompare.wdl | 44 +++++++++++++++++++++++--------------------- gffread.wdl | 2 +- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 20bf8e27..8b08e111 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -81,7 +81,7 @@ task RunDeepVariant { modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag.", category: "required"} outputVcf: {description: "Path where we should write VCF file.", category: "required"} postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} - customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used"., category: "advanced"} + customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} diff --git a/gffcompare.wdl b/gffcompare.wdl index 5d80f619..8bd53091 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -22,16 +22,11 @@ version 1.0 task GffCompare { input { - File? inputGtfList Array[File] inputGtfFiles File referenceAnnotation - String? outputDir - String outPrefix = "gffcmp" # gffcmp is the default used by the program as well. This - # needs to be defined in order for the output values to be consistent and correct. - File? genomeSequences - Int? maxDistanceFreeEndsTerminalExons - Int? maxDistanceGroupingTranscriptStartSites - String? namePrefix + # gffcmp is the default used by the program as well. This needs to be + # defined in order for the output values to be consistent and correct. + String outPrefix = "gffcmp" Boolean C = false Boolean A = false Boolean X = false @@ -44,15 +39,22 @@ task GffCompare { Boolean verbose = false Boolean debugMode = false + File? inputGtfList + String? outputDir + File? genomeSequences + Int? maxDistanceFreeEndsTerminalExons + Int? maxDistanceGroupingTranscriptStartSites + String? namePrefix + Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" # This workaround only works in the input section. - # Issue addressed at https://github.com/openwdl/wdl/pull/263 + # Issue addressed at https://github.com/openwdl/wdl/pull/263. File? noneFile # This is a wdl workaround. Please do not assign! } - # This allows for the creation of output directories + # This allows for the creation of output directories. String dirPrefix = if defined(outputDir) then select_first([outputDir]) + "/" else "" @@ -93,22 +95,22 @@ task GffCompare { then "annotated" else "combined" - # Check if a redundant .gtf will be created + # Check if a redundant .gtf will be created. Boolean createRedundant = C || A || X output { + # noneFile is not stable. Please replace this as soon as wdl spec allows. File annotated = totalPrefix + "." + annotatedName + ".gtf" File loci = totalPrefix + ".loci" File stats = totalPrefix + ".stats" File tracking = totalPrefix + ".tracking" - # noneFile is not stable. Please replace this as soon as wdl spec allows + Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons]) File? redundant = if createRedundant then totalPrefix + ".redundant.gtf" else noneFile File? missedIntrons = if debugMode then totalPrefix + ".missed_introns.gtf" else noneFile - Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons]) } runtime { @@ -117,15 +119,10 @@ task GffCompare { } parameter_meta { - inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"} + # inputs inputGtfFiles: {description: "The input GTF files.", category: "required"} referenceAnnotation: {description: "The GTF file to compare with.", category: "required"} - outputDir: {description: "The location the output should be written.", category: "common"} outPrefix: {description: "The prefix for the output.", category: "advanced"} - genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"} - maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} - maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} - namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} C: {description: "Equivalent to gffcompare's `-C` flag.", category: "advanced"} A: {description: "Equivalent to gffcompare's `-A` flag.", category: "advanced"} X: {description: "Equivalent to gffcompare's `-X` flag.", category: "advanced"} @@ -137,9 +134,14 @@ task GffCompare { noTmap: {description: "Equivalent to gffcompare's `-T` flag.", category: "advanced"} verbose: {description: "Equivalent to gffcompare's `-V` flag.", category: "advanced"} debugMode: {description: "Equivalent to gffcompare's `-D` flag.", category: "advanced"} + inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"} + outputDir: {description: "The location the output should be written.", category: "common"} + genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"} + maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} + maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} + namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { diff --git a/gffread.wdl b/gffread.wdl index d83e4d76..76ee20d1 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -79,4 +79,4 @@ task GffRead { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From ca4fe2d92f42b2c32b42197deeef204cec07762f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 08:54:32 +0100 Subject: [PATCH 130/668] Add another batch of updates. --- CHANGELOG.md | 1 + gatk.wdl | 1 + gffread.wdl | 16 +++++++------ gridss.wdl | 15 +++++++------ hisat2.wdl | 32 +++++++++++++------------- htseq.wdl | 13 ++++++----- isoseq3.wdl | 18 +++++++-------- lima.wdl | 10 ++++----- macs2.wdl | 2 +- manta.wdl | 19 +++++++++------- minimap2.wdl | 27 +++++++++++----------- multiqc.wdl | 63 ++++++++++++++++++++++++++-------------------------- nanopack.wdl | 10 ++++----- 13 files changed, 119 insertions(+), 108 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c04b582..028c7400 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. diff --git a/gatk.wdl b/gatk.wdl index cc5d1de5..7aa2915c 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -64,6 +64,7 @@ task AnnotateIntervals { } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} diff --git a/gffread.wdl b/gffread.wdl index 76ee20d1..343011e9 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -24,19 +24,21 @@ task GffRead { input { File inputGff File genomicSequence + Boolean outputGtfFormat = false + File? genomicIndex # Optional. GFFRead can create this by itself. String? exonsFastaPath String? CDSFastaPath String? proteinFastaPath String? filteredGffPath - Boolean outputGtfFormat = false + Int timeMinutes = 1 + ceil(size(inputGff) * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } # The mkdirs below are hackish. It should be - # ~{"mkir -p $(dirname " + somePath + ")"} - # but this goes wrong. Cromwell will always use ')' even if somepath is not defined. + # ~{"mkir -p $(dirname " + somePath + ")"} but this goes wrong. + # Cromwell will always use ')' even if somepath is not defined. # Which leads to crashing. command { set -e @@ -62,21 +64,21 @@ task GffRead { } runtime { - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputGff: {description: "The input GFF file.", category: "required"} genomicSequence: {description: "The genome.", category: "required"} + outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"} genomicIndex: {description: "The genome's index.", category: "advanced"} exonsFastaPath: {description: "The location the exons fasta should be written to.", category: "advanced"} CDSFastaPath: {description: "The location the CDS fasta should be written to.", category: "advanced"} proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"} filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} - outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/gridss.wdl b/gridss.wdl index 44b9e9f1..9499be5e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -27,11 +27,12 @@ task GRIDSS { File tumorBam File tumorBai String tumorLabel + BwaIndex reference + String outputPrefix = "gridss" + File? normalBam File? normalBai String? normalLabel - BwaIndex reference - String outputPrefix = "gridss" Int jvmHeapSizeGb = 30 Int threads = 1 @@ -68,17 +69,17 @@ task GRIDSS { } parameter_meta { + # inputs tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"} tumorBai: {description: "The index for tumorBam.", category: "required"} tumorLabel: {description: "The name of the (tumor) sample.", category: "required"} + reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} - reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} - outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} - + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling.",category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} - jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/hisat2.wdl b/hisat2.wdl index f9a4bc59..b52bf70f 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -22,9 +22,9 @@ version 1.0 task Hisat2 { input { - Array[File]+ indexFiles File inputR1 File? inputR2 + Array[File]+ indexFiles String outputBam String sample String library @@ -32,22 +32,22 @@ task Hisat2 { String platform = "illumina" Boolean downstreamTranscriptomeAssembly = true String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" - - Int threads = 4 - Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 + + Int? sortThreads + + Int threads = 4 Int? memoryGb Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 - # is a combination of hisat2 and samtools - # hisat2=2.2.0, samtools=1.10 + # is a combination of hisat2 and samtools hisat2=2.2.0 & samtools=1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2880dd9d8ad0a7b221d4eacda9a818e92983128d-0" } - # Samtools sort may block the pipe while it is writing data to disk. + # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) Int estimatedMemoryGb = 1 + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads @@ -81,16 +81,17 @@ task Hisat2 { } runtime { - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" cpu: threads + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage } parameter_meta { - indexFiles: {description: "The hisat2 index files.", category: "required"} + # inputs inputR1: {description: "The first-/single-end FastQ file.", category: "required"} inputR2: {description: "The second-end FastQ file.", category: "common"} + indexFiles: {description: "The hisat2 index files.", category: "required"} outputBam: {description: "The location the output BAM file should be written to.", category: "required"} sample: {description: "The sample id.", category: "required"} library: {description: "The library id.", category: "required"} @@ -98,13 +99,12 @@ task Hisat2 { platform: {description: "The platform used for sequencing.", category: "advanced"} downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/htseq.wdl b/htseq.wdl index cbd8e2ac..cf527535 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -27,9 +27,10 @@ task HTSeqCount { String outputTable = "output.tsv" String order = "pos" String stranded = "no" + Array[String] additionalAttributes = [] + String? featureType String? idattr - Array[String] additionalAttributes = [] Int nprocesses = 1 String memory = "8G" @@ -58,24 +59,24 @@ task HTSeqCount { runtime { cpu: nprocesses - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { + # inputs inputBams: {description: "The input BAM files.", category: "required"} gtfFile: {description: "A GTF/GFF file containing the features of interest.", category: "required"} outputTable: {description: "The path to which the output table should be written.", category: "common"} - nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"} order: {description: "Equivalent to the -r option of htseq-count.", category: "advanced"} stranded: {description: "Equivalent to the -s option of htseq-count.", category: "common"} + additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"} featureType: {description: "Equivalent to the --type option of htseq-count.", category: "advanced"} idattr: {description: "Equivalent to the --idattr option of htseq-count.", category: "advanced"} - additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"} + nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"} memory: {description: "The amount of memory the job requires in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/isoseq3.wdl b/isoseq3.wdl index 5060f0e7..c1c4397c 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -31,7 +31,7 @@ task Refine { String outputDir String outputNamePrefix - Int cores = 2 + Int threads = 2 String memory = "2G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" @@ -44,7 +44,7 @@ task Refine { --min-polya-length ~{minPolyALength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ - --num-threads ~{cores} \ + --num-threads ~{threads} \ --log-file "~{outputDir}/~{outputNamePrefix}.stderr.log" \ ~{inputBamFile} \ ~{primerFile} \ @@ -61,7 +61,7 @@ task Refine { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -77,7 +77,7 @@ task Refine { primerFile: {description: "Barcode/primer fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputNamePrefix: {description: "Basename of the output files.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/lima.wdl b/lima.wdl index 7ef9d4ab..1da4ef5e 100644 --- a/lima.wdl +++ b/lima.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/macs2.wdl b/macs2.wdl index fad3cb00..757eaf67 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -54,4 +54,4 @@ task PeakCalling { memory: memory docker: dockerImage } -} \ No newline at end of file +} diff --git a/manta.wdl b/manta.wdl index 5382d2a5..a7b7cf38 100644 --- a/manta.wdl +++ b/manta.wdl @@ -27,9 +27,10 @@ task Germline { File referenceFasta File referenceFastaFai String runDir = "./manta_run" + Boolean exome = false + File? callRegions File? callRegionsIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -71,9 +72,9 @@ task Germline { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } runDir: {description: "The directory to use as run/output directory.", category: "common"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The the number of cores required to run a program", category: "required"} memoryGb: {description: "The memory required to run the manta", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -85,14 +86,15 @@ task Somatic { input { File tumorBam File tumorBamIndex - File? normalBam - File? normalBamIndex File referenceFasta File referenceFastaFai String runDir = "./manta_run" + Boolean exome = false + + File? normalBam + File? normalBamIndex File? callRegions File? callRegionsIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -138,16 +140,17 @@ task Somatic { } parameter_meta { + # inputs tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} - normalBam: {description: "The normal/control sample's BAM file.", category: "common"} - normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} runDir: {description: "The directory to use as run/output directory.", category: "common"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/minimap2.wdl b/minimap2.wdl index fb31fb7f..1b719da6 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -61,7 +61,7 @@ task Indexing { } parameter_meta { - # input + # inputs useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for pacbio).", category: "advanced"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} minimizerWindowSize: {description: "Minimizer window size.", category: "advanced"} @@ -73,7 +73,7 @@ task Indexing { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # output + # outputs indexFile: {description: "Indexed reference file."} } } @@ -137,27 +137,28 @@ task Mapping { } parameter_meta { + # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} + skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} + secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} + referenceFile: {description: "Reference fasta file.", category: "required"} + queryFile: {description: "Input fasta file.", category: "required"} maxIntronLength: {description: "Max intron length (effective with -xsplice; changing -r).", category: "advanced"} maxFragmentLength: {description: "Max fragment length (effective with -xsr or in the fragment mode).", category: "advanced"} - skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} - addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} - secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} - referenceFile: {description: "Reference fasta file.", category: "required"} - queryFile: {description: "Input fasta file.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # output + # outputs alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} } } diff --git a/multiqc.wdl b/multiqc.wdl index 7dcf333e..647394e9 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -22,16 +22,28 @@ version 1.0 task MultiQC { input { - # Use a string here so cromwell does not relocate an entire analysis directory + # Use a string here so cromwell does not relocate an entire + # analysis directory. Array[File] reports Boolean force = false Boolean dirs = false - Int? dirsDepth Boolean fullNames = false + String outDir = "." + Boolean dataDir = false + Boolean zipDataDir = true + Boolean export = false + Boolean flat = false + Boolean interactive = true + Boolean lint = false + Boolean pdf = false + # This must be actively enabled in my opinion. + # The tools default is to upload. + Boolean megaQCUpload = false + + Int? dirsDepth String? title String? comment String? fileName - String outDir = "." String? template String? tag String? ignore @@ -40,21 +52,15 @@ task MultiQC { File? fileList Array[String]+? exclude Array[String]+? module - Boolean dataDir = false String? dataFormat - Boolean zipDataDir = true - Boolean export = false - Boolean flat = false - Boolean interactive = true - Boolean lint = false - Boolean pdf = false - Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload. File? config # A directory String? clConfig + String? memory Int timeMinutes = 2 + ceil(size(reports, "G") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } + Int memoryGb = 2 + ceil(size(reports, "G")) # This is where the reports end up. It does not need to be changed by the @@ -69,8 +75,9 @@ task MultiQC { # By hashing the parent path we make sure there are no file colissions as # files from the same directory end up in the same directory, while files # from other directories get their own directory. Cromwell also uses this - # strategy. Using python's builtin hash is unique enough for these purposes. - + # strategy. Using python's builtin hash is unique enough + # for these purposes. + command { python3 < Date: Mon, 2 Nov 2020 09:17:33 +0100 Subject: [PATCH 131/668] Address travis error. --- CHANGELOG.md | 3 +++ fastqc.wdl | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 028c7400..c331112c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Bwa & bwa-mem2: Add parameter_meta for `outputHla`. ++ Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. ++ Bam2fastx: Add localisation of input files to Bam2Fasta task. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. + CCS: Update CCS to version 5. diff --git a/fastqc.wdl b/fastqc.wdl index dd3dfc2e..feeeaae5 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -47,7 +47,7 @@ task Fastqc { Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0 + String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? noneArray File? noneFile From 163290340ff4f5ed0488c69d2c194dbb3428a423 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 11:57:06 +0100 Subject: [PATCH 132/668] Add another batch of updated tasks. --- CHANGELOG.md | 4 + centrifuge.wdl | 4 +- ncbi.wdl | 51 ++++---- pbbam.wdl | 10 +- pbmm2.wdl | 13 ++- picard.wdl | 312 +++++++++++++++++++++++-------------------------- rtg.wdl | 79 ++++++------- sambamba.wdl | 57 +++++---- 8 files changed, 257 insertions(+), 273 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c331112c..f0dfaf1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Picard: Add parameter_meta to `SortSam`. ++ pbmm2: Add parameter_meta for `sample`. ++ Centrifuge: Rename output in task `KReport` to `KrakenReport` to resolve + name collision with task name. + Bwa & bwa-mem2: Add parameter_meta for `outputHla`. + Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. + Bam2fastx: Add localisation of input files to Bam2Fasta task. diff --git a/centrifuge.wdl b/centrifuge.wdl index 1637abdd..07dc7f85 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -270,7 +270,7 @@ task KReport { >>> output { - File KReport = outputPrefix + "_kreport.tsv" + File KrakenReport = outputPrefix + "_kreport.tsv" } runtime { @@ -294,7 +294,7 @@ task KReport { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - KReport: {description: "File with kraken style report."} + KrakenReport: {description: "File with kraken style report."} } } diff --git a/ncbi.wdl b/ncbi.wdl index d157d902..da753bac 100644 --- a/ncbi.wdl +++ b/ncbi.wdl @@ -23,6 +23,10 @@ version 1.0 task GenomeDownload { input { String outputPath + Boolean verbose = true + Boolean debug = false + String executable = "ncbi-genome-download" + String? section = "refseq" String? format = "all" String? assemblyLevel = "all" @@ -32,11 +36,7 @@ task GenomeDownload { String? ncbiBaseUri Int? parallel Int? retries - Boolean verbose = true - Boolean debug = false String? domain = "all" - - String executable = "ncbi-genome-download" String? preCommand } @@ -58,22 +58,22 @@ task GenomeDownload { ~{true="--debug" false ="" debug } \ ~{domain} - # Check md5sums for all downloaded files + # Check md5sums for all downloaded files. for folder in $(realpath ~{outputPath})/*/*/* - do - ( - md5sums="$( - cd $folder - for file in * - do - if [[ ! $file == "MD5SUMS" ]] - then - grep $file MD5SUMS - fi - done - )" - cd $folder; echo $md5sums | md5sum -c) - done + do + ( + md5sums="$( + cd $folder + for file in * + do + if [[ ! $file == "MD5SUMS" ]] + then + grep $file MD5SUMS + fi + done + )" + cd $folder; echo $md5sums | md5sum -c) + done } output { @@ -106,7 +106,7 @@ task DownloadNtFasta{ mkdir -p ~{ntDir} rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ~{ntDir} (cd ~{ntDir} && md5sum -c nt.gz.md5) - # Only unzip when necessary + # Only unzip when necessary. if ~{true='true' false='false' unzip} then zcat ~{ntDir}/nt.gz > ~{ntFilePath} @@ -132,15 +132,16 @@ task DownloadAccessionToTaxId { command { set -e -o pipefail mkdir -p ~{downloadDir} - rsync -av \ - --partial \ - rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* \ - ~{downloadDir} + rsync \ + -av \ + --partial \ + rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* \ + ~{downloadDir} (cd ~{downloadDir} && md5sum -c *.md5) for file in ~{downloadDir}/nucl_*.accession2taxid.gz do zcat $file | tail -n +2 | cut -f 2,3 ~{true="| gzip" false='' gzip} > \ - $file.seqtaxmap~{true='.gz' false='' gzip} + $file.seqtaxmap~{true='.gz' false='' gzip} done } diff --git a/pbbam.wdl b/pbbam.wdl index 52737a00..d271a11a 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -18,12 +18,14 @@ version 1.0 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. task Index { input { File bamFile + String? outputBamPath - + String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" @@ -60,11 +62,9 @@ task Index { parameter_meta { # inputs bamFile: {description: "The BAM file for which an index should be made.", category: "required"} - outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", - category: "common"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/pbmm2.wdl b/pbmm2.wdl index 31d4c667..5fda1c87 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -58,9 +58,10 @@ task Mapping { } parameter_meta { + # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} - sample: {description: "Name of the sample"} + sample: {description: "Name of the sample.", category: "required"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -68,7 +69,7 @@ task Mapping { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # output + # outputs outputAlignmentFile: {description: "Mapped bam file."} outputIndexFile: {description: "Bam index file."} } diff --git a/picard.wdl b/picard.wdl index 49db8b8b..f1876f7b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -26,8 +26,8 @@ task BedToIntervalList { File dict String outputPath = "regions.interval_list" - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -47,9 +47,9 @@ task BedToIntervalList { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -57,12 +57,10 @@ task BedToIntervalList { bedFile: {description: "A bed file.", category: "required"} dict: {description: "A sequence dict file.", category: "required"} outputPath: {description: "The location the output interval list should be written to.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -74,17 +72,19 @@ task CollectHsMetrics { File referenceFastaDict File referenceFastaFai File targets - File? baits String basename + File? baits + # Use the targets file as baits as a fallback, since often the baits # for a certain capture kit are not available. File baitsFile = select_first([baits, targets]) File targetsFile = targets - Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 3072 - # Additional * 2 because picard multiple metrics reads the reference fasta twice. + Int memoryMb = javaXmxMb + 512 + # Additional * 2 because picard multiple metrics reads the + # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -106,9 +106,9 @@ task CollectHsMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -116,18 +116,15 @@ task CollectHsMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} targets: {description: "Picard interval file of the capture targets.", category: "required"} - baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -139,7 +136,6 @@ task CollectMultipleMetrics { File referenceFastaDict File referenceFastaFai String basename - Boolean collectAlignmentSummaryMetrics = true Boolean collectInsertSizeMetrics = true Boolean qualityScoreDistribution = true @@ -150,14 +146,13 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 3072 + Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } - command { set -e mkdir -p "$(dirname ~{basename})" @@ -173,8 +168,7 @@ task CollectMultipleMetrics { ~{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \ ~{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \ ~{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \ - ~{true="PROGRAM=CollectSequencingArtifactMetrics" false="" - collectSequencingArtifactMetrics} \ + ~{true="PROGRAM=CollectSequencingArtifactMetrics" false="" collectSequencingArtifactMetrics} \ ~{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics} } @@ -221,9 +215,9 @@ task CollectMultipleMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -231,30 +225,21 @@ task CollectMultipleMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", - category: "advanced"} - collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", - category: "advanced"} - qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", - category: "advanced"} + collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", category: "advanced"} + collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", category: "advanced"} + qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", category: "advanced"} meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", category: "advanced"} - collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", - category: "advanced"} + collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", category: "advanced"} collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", category: "advanced"} - collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", - category: "advanced"} - collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", - category: "advanced"} + collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", category: "advanced"} + collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -266,9 +251,9 @@ task CollectRnaSeqMetrics { String basename String strandSpecificity = "NONE" - String memory = "9G" String javaXmx = "8G" - # With 6 minutes per G there were several timeouts. + String memory = "9G" + # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -286,14 +271,14 @@ task CollectRnaSeqMetrics { } output { - File? chart = basename + ".RNA_Metrics.pdf" File metrics = basename + ".RNA_Metrics" + File? chart = basename + ".RNA_Metrics.pdf" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -302,15 +287,11 @@ task CollectRnaSeqMetrics { inputBamIndex: {description: "The index of the input BAM file.", category: "required"} refRefflat: {description: "A refflat file containing gene annotations.", catehory: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.", - category: "common"} - + strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -325,8 +306,8 @@ task CollectTargetedPcrMetrics { Array[File]+ targetIntervals String basename - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -352,9 +333,9 @@ task CollectTargetedPcrMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -362,21 +343,15 @@ task CollectTargetedPcrMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ampliconIntervals: {description: "An interval list describinig the coordinates of the amplicons sequenced.", - category: "required"} - targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.", - category: "required"} + ampliconIntervals: {description: "An interval list describinig the coordinates of the amplicons sequenced.", category: "required"} + targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -388,8 +363,8 @@ task CollectVariantCallingMetrics { File inputVCFIndex String basename - String memory = "9G" String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -410,24 +385,22 @@ task CollectVariantCallingMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs dbsnp: {description: "DBSNP vcf file to use with CollectVariantCallingMetrics.", category: "required"} dbsnpIndex: {description: "Index file for the DBSNP VCF.", category: "required"} - inputVCF: {description: "Input VCF file", category: "required"} + inputVCF: {description: "Input VCF file.", category: "required"} inputVCFIndex: {description: "Index file for the input VCF.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -436,8 +409,8 @@ task CreateSequenceDictionary { File inputFile String outputDir - String memory = "3G" String javaXmx = "2G" + String memory = "3G" String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -464,8 +437,8 @@ task CreateSequenceDictionary { # inputs inputFile: {description: "The input fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -473,17 +446,19 @@ task CreateSequenceDictionary { } } -# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs +# Combine multiple recalibrated BAM files from scattered +# ApplyRecalibration runs. task GatherBamFiles { input { Array[File]+ inputBams Array[File]+ inputBamsIndex String outputBamPath + Boolean createMd5File = false - Int memoryMb = javaXmxMb + 512 - Int javaXmxMb = 1024 Int? compressionLevel - Boolean createMd5File = false + + Int javaXmxMb = 1024 + Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" @@ -508,9 +483,9 @@ task GatherBamFiles { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -518,14 +493,12 @@ task GatherBamFiles { inputBams: {description: "The BAM files to be merged together.", category: "required"} inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -535,8 +508,8 @@ task GatherVcfs { Array[File]+ inputVcfIndexes String outputVcfPath = "out.vcf.gz" - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -555,9 +528,9 @@ task GatherVcfs { } runtime { - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -565,17 +538,14 @@ task GatherVcfs { inputVcfs: {description: "The VCF files to be merged together.", category: "required"} inputVcfIndexes: {description: "The indexes of the input VCF files.", category: "required"} outputVcfPath: {description: "The path where the merged VCF file will be written.", caregory: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Mark duplicate reads to avoid counting non-independent observations +# Mark duplicate reads to avoid counting non-independent observations. task MarkDuplicates { input { Array[File]+ inputBams @@ -583,31 +553,32 @@ task MarkDuplicates { String metricsPath Int compressionLevel = 1 Boolean createMd5File = false - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. - # NOTE: this might change in the future when the intel deflater is updated! + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). + # Higher compression levels similar to intel deflater. + # NOTE: this might change in the future when the intel + # deflater is updated! Boolean useJdkDeflater = true - # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. + # The program default for READ_NAME_REGEX is appropriate in nearly every case. + # Sometimes we wish to supply "null" in order to turn off optical duplicate detection. + # This can be desirable if you don't mind the estimated library size + # being wrong and optical duplicate detection is taking >7 days and failing. + String? read_name_regex + + # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. # https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L1040 Int javaXmxMb = 6656 # 6.5G String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" - - # The program default for READ_NAME_REGEX is appropriate in nearly every case. - # Sometimes we wish to supply "null" in order to turn off optical duplicate detection - # This can be desirable if you don't mind the estimated library size being wrong and - # optical duplicate detection is taking >7 days and failing - String? read_name_regex } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get # marked correctly. This works because the output of BWA is query-grouped and therefore, # so is the output of MergeBamAlignment. While query-grouped isn't actually query-sorted, - # it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname" - + # it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname". command { set -e mkdir -p "$(dirname ~{outputBamPath})" @@ -625,7 +596,7 @@ task MarkDuplicates { ADD_PG_TAG_TO_READS=false \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -636,9 +607,9 @@ task MarkDuplicates { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -646,42 +617,39 @@ task MarkDuplicates { inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} - read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} - compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} + read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs +# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs. task MergeVCFs { input { Array[File]+ inputVCFs Array[File]+ inputVCFsIndexes String outputVcfPath - - String memory = "5G" - String javaXmx = "4G" - Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" Int compressionLevel = 1 - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). + # Higher compression levels similar to intel deflater. # NOTE: this might change in the future when the intel deflater is updated! Boolean useJdkDeflater = true + String javaXmx = "4G" + String memory = "5G" + Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } - # Using MergeVcfs instead of GatherVcfs so we can create indices - # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket - + # Using MergeVcfs instead of GatherVcfs so we can create indices. + # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket. command { set -e mkdir -p "$(dirname ~{outputVcfPath})" @@ -691,7 +659,7 @@ task MergeVCFs { OUTPUT=~{outputVcfPath} \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -700,9 +668,9 @@ task MergeVCFs { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -710,16 +678,13 @@ task MergeVCFs { inputVCFs: {description: "The VCF files to be merged.", category: "required"} inputVCFsIndexes: {description: "The indexes of the VCF files.", category: "required"} outputVcfPath: {description: "The location the output VCF file should be written to.", category: "required"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} - compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -729,10 +694,12 @@ task SamToFastq { File inputBamIndex Boolean paired = true - String memory = "17G" String javaXmx = "16G" # High memory default to avoid crashes. + String memory = "17G" + Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" - File? NONE + + File? noneFile } String outputRead1 = basename(inputBam, "\.[bs]am") + "_R1.fastq.gz" @@ -751,13 +718,20 @@ task SamToFastq { output { File read1 = outputRead1 - File? read2 = if paired then outputRead2 else NONE - File? unpairedRead = if paired then outputUnpaired else NONE + File? read2 = if paired then outputRead2 else noneFile + File? unpairedRead = if paired then outputUnpaired else noneFile } runtime { - docker: dockerImage memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + meta { + WDL_AID: { + exclude: ["noneFile"] + } } } @@ -766,8 +740,8 @@ task ScatterIntervalList { File interval_list Int scatter_count - String memory = "4G" String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -790,8 +764,8 @@ task ScatterIntervalList { } runtime { - docker: dockerImage memory: memory + docker: dockerImage } } @@ -804,7 +778,7 @@ task SortSam { Int maxRecordsInRam = 500000 Int compressionLevel = 1 - # Default ram of 4 GB. Using 125001.0 to prevent an answer of + # Default ram of 4 GB. Using 125001.0 to prevent an answer of # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) @@ -840,13 +814,16 @@ task SortSam { } parameter_meta { - inputBam: {description: "The unsorted input BAM file", category: "required"} + # inputs + inputBam: {description: "The unsorted input BAM file.", category: "required"} outputPath: {description: "The location the output BAM file should be written to.", category: "required"} - XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", - category: "advanced"} + sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"} + createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"} + maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} + compressionLevel: {description: "Compression level for all compressed files created.", category: "advanced"} + XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -854,10 +831,11 @@ task SortVcf { input { Array[File]+ vcfFiles String outputVcfPath + File? dict - String memory = "9G" String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -879,9 +857,9 @@ task SortVcf { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -889,13 +867,10 @@ task SortVcf { vcfFiles: {description: "The VCF files to merge and sort.", category: "required"} outputVcfPath: {description: "The location the sorted VCF files should be written to.", category: "required"} dict: {description: "A sequence dictionary matching the VCF files.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -904,8 +879,9 @@ task RenameSample { File inputVcf String outputPath = "./picard/renamed.vcf" String newSampleName - String memory = "9G" + String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" } @@ -925,9 +901,9 @@ task RenameSample { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -935,8 +911,8 @@ task RenameSample { inputVcf: {description: "The VCF file to process.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} newSampleName: {description: "A string to replace the old sample name.", category: "required"} - memory: {description: "The memory required to run the programs", category: "advanced"} - javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/rtg.wdl b/rtg.wdl index 104a5ef9..bfd32957 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -22,13 +22,14 @@ version 1.0 task Format { input { + Array[File]+ inputFiles String format = "fasta" String outputPath = "seq_data.sdf" - Array[File]+ inputFiles - String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" + String rtgMem = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputFiles) * 2) + String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } command { @@ -44,21 +45,20 @@ task Format { } runtime { - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", - category: "advanced"} - outputPath: {description: "Where the output should be placed.", category: "advanced"} + # inputs inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", category: "advanced"} + outputPath: {description: "Where the output should be placed.", category: "advanced"} + rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -68,18 +68,20 @@ task VcfEval { File baselineIndex File calls File callsIndex - File? evaluationRegions - File? bedRegions + Boolean squashPloidy = false + String outputMode = "split" String outputDir = "output/" File template Boolean allRecords = false Boolean decompose = false Boolean refOverlap = false + + File? evaluationRegions + File? bedRegions String? sample - Boolean squashPloidy = false - String outputMode = "split" - Int threads = 1 # tool default is number of cores in the system 😱 + String rtgMem = "8G" + Int threads = 1 # Tool default is number of cores in the system 😱. String memory = "9G" Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" @@ -132,39 +134,32 @@ task VcfEval { } runtime { - docker: dockerImage cpu: threads memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - baseline: {description: "VCF file containing baseline variants", category: "required"} - baselineIndex: {description: "The baseline's VCF index", category: "required"} - calls: {description: "VCF file containing called variants", category: "required"} - callsIndex: {description: "The call's VCF index", category: "required"} - outputDir: {description: "Directory for output", category: "advanced"} - bedRegions: {description: "if set, only read VCF records that overlap the ranges contained in the specified BED file", category: "advanced"} - evaluationRegions: {description: "if set, evaluate within regions contained in the supplied BED file, allowing transborder matches. To be used for truth-set high-confidence regions or other regions of interest where region boundary effects should be minimized", - category: "advanced"} - template: {description: "SDF of the reference genome the variants are called against", category: "required"} - allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\")", - category: "common"} - decompose: {description: "decompose complex variants into smaller constituents to allow partial credit", category: "common"} - refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap)", - category: "common"} - sample: {description: "the name of the sample to select. Use , to select different sample names for baseline and calls. (Required when using multi-sample VCF files)", - category: "common"} - squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences", - category: "common"} - outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split)", - category: "advanced"} - threads: {description: "Number of threads. Default is 1", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} + # inputs + baseline: {description: "VCF file containing baseline variants.", category: "required"} + baselineIndex: {description: "The baseline's VCF index.", category: "required"} + calls: {description: "VCF file containing called variants.", category: "required"} + callsIndex: {description: "The call's VCF index.", category: "required"} + squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences.", category: "common"} + outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split).", category: "advanced"} + outputDir: {description: "Directory for output.", category: "advanced"} + template: {description: "SDF of the reference genome the variants are called against.", category: "required"} + allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\").", category: "common"} + decompose: {description: "decompose complex variants into smaller constituents to allow partial credit.", category: "common"} + refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap).", category: "common"} + sample: {description: "the name of the sample to select. Use , to select different sample names for baseline and calls. (Required when using multi-sample VCF files).", category: "common"} + bedRegions: {description: "if set, only read VCF records that overlap the ranges contained in the specified BED file.", category: "advanced"} + evaluationRegions: {description: "if set, evaluate within regions contained in the supplied BED file, allowing transborder matches. To be used for truth-set high-confidence regions or other regions of interest where region boundary effects should be minimized.", category: "advanced"} + rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"} + threads: {description: "Number of threads. Default is 1.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/sambamba.wdl b/sambamba.wdl index cd8da21e..df5ab4d1 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -20,29 +20,31 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - task Markdup { input { Array[File] inputBams String outputPath - # Sambamba scales like this: 1 thread is fully utilized (1). 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7. - # 2 threads reduces wall clock time by more than 40%. - Int threads = 2 Int compressionLevel = 1 - Int? hashTableSize - Int? overFlowListSize - # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1 + # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1. Int sortBufferSize = 2048 Int ioBufferSize = 128 - Boolean removeDuplicates = false + Boolean removeDuplicates = false + Int? hashTableSize + Int? overFlowListSize + + # Sambamba scales like this: 1 thread is fully utilized (1). + # 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7. + # 2 threads reduces wall clock time by more than 40%. + Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize - String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } + String bamIndexPath = sub(outputPath, "\.bam$", ".bai") command { @@ -57,7 +59,7 @@ task Markdup { ~{"--sort-buffer-size " + sortBufferSize} \ ~{"--io-buffer-size " + ioBufferSize} \ ~{sep=' ' inputBams} ~{outputPath} - # sambamba creates an index for us + # sambamba creates an index for us. mv ~{outputPath}.bai ~{bamIndexPath} } @@ -67,8 +69,8 @@ task Markdup { } runtime { - memory: "~{memoryMb}M" cpu: threads + memory: "~{memoryMb}M" time_minutes: timeMinutes docker: dockerImage } @@ -78,17 +80,19 @@ task Markdup { inputBams: {description: "The input BAM files.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "required"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"} - removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"} - hashTableSize: {description: "Sets sambamba's hash table size", category: "advanced"} - overFlowListSize: {description: "Sets sambamba's overflow list size", category: "advanced"} - sortBufferSize: {description: "The amount of mb allocated to the sort buffer", category: "advanced"} + sortBufferSize: {description: "The amount of mb allocated to the sort buffer.", category: "advanced"} ioBufferSize: {description: "The amount of mb allocated to each IO buffer. Sambamba uses two IO buffers.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"} + hashTableSize: {description: "Sets sambamba's hash table size.", category: "advanced"} + overFlowListSize: {description: "Sets sambamba's overflow list size.", category: "advanced"} threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} } } @@ -98,14 +102,15 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 1 + Int memoryPerThreadGb = 4 + Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } - # Select first needed as outputPath is optional input. (bug in cromwell) + # Select first needed as outputPath is optional input (bug in cromwell). String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command { @@ -118,7 +123,7 @@ task Sort { -m ~{memoryPerThreadGb}G \ -o ~{outputPath} \ ~{inputBam} - # sambamba creates an index for us + # sambamba creates an index for us. mv ~{outputPath}.bai ~{bamIndexPath} } @@ -140,12 +145,14 @@ task Sort { outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} } -} \ No newline at end of file +} From f81a99e864af4a567a33e0850dfd1f0672d60a96 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 12:16:45 +0100 Subject: [PATCH 133/668] Update layout samtools.wdl. --- samtools.wdl | 92 +++++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 9e415b0e..496cf233 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -55,8 +55,7 @@ task BgzipAndIndex { outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -104,11 +103,12 @@ task Fastq { String outputRead1 String? outputRead2 String? outputRead0 + Boolean appendReadNumber = false + Boolean outputQuality = false + Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter - Boolean appendReadNumber = false - Boolean outputQuality = false Int? compressionLevel Int threads = 1 @@ -151,16 +151,16 @@ task Fastq { outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} - includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} - excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} - excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} - appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} + appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`.", category: "advanced"} outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} + includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`.", category: "advanced"} + excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`.", category: "advanced"} + excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`.", category: "advanced"} + compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -168,6 +168,7 @@ task FilterShortReadsBam { input { File bamFile String outputPathBam + String memory = "1G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -196,6 +197,7 @@ task FilterShortReadsBam { } parameter_meta { + # inputs bamFile: {description: "The bam file to process.", category: "required"} outputPathBam: {description: "The filtered bam file.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -236,15 +238,16 @@ task Flagstat { outputPath: {description: "The location the ouput should be written to.", category: "required"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task Index { input { File bamFile + String? outputBamPath + String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -281,12 +284,10 @@ task Index { parameter_meta { # inputs bamFile: {description: "The BAM file for which an index should be made.", category: "required"} - outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", - category: "common"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -319,8 +320,7 @@ task Markdup { inputBam: {description: "The BAM file to be processed.", category: "required"} outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -329,12 +329,13 @@ task Merge { Array[File]+ bamFiles String outputBamPath = "merged.bam" Boolean force = true - Int threads = 1 - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + Int threads = 1 String memory = "4G" + Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } + String indexPath = sub(outputBamPath, "\.bam$",".bai") # Samtools uses additional threads for merge. @@ -355,21 +356,20 @@ task Merge { runtime { cpu: threads - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -379,14 +379,15 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 1 + Int memoryPerThreadGb = 4 + Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } - # Select first needed as outputPath is optional input. (bug in cromwell) + # Select first needed as outputPath is optional input (bug in cromwell). String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command { @@ -410,10 +411,10 @@ task Sort { } runtime { - cpu: 1 + cpu: threads memory: "~{memoryGb}G" - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -422,14 +423,15 @@ task Sort { outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description "Sorted BAM file index."} } } @@ -438,10 +440,13 @@ task Tabix { File inputFile String outputFilePath = "indexed.vcf.gz" String type = "vcf" + Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } - # FIXME: It is better to do the indexing on VCF creation. Not in a separate task. With file localization this gets hairy fast. + + # FIXME: It is better to do the indexing on VCF creation. + # Not in a separate task. With file localization this gets hairy fast. command { set -e mkdir -p "$(dirname ~{outputFilePath})" @@ -459,27 +464,26 @@ task Tabix { runtime { time_minutes: timeMinutes - docker: dockerImage + docker: dockerImage } parameter_meta { # inputs inputFile: {description: "The file to be indexed.", category: "required"} - outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", - category: "common"} + outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task View { input { File inFile - File? referenceFasta String outputFileName = "view.bam" Boolean uncompressedBamOutput = false + + File? referenceFasta Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter @@ -490,9 +494,10 @@ task View { Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } + String outputIndexPath = basename(outputFileName) + ".bai" - # Always output to bam and output header + # Always output to bam and output header. command { set -e mkdir -p "$(dirname ~{outputFileName})" @@ -524,9 +529,9 @@ task View { parameter_meta { # inputs inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} - referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} @@ -534,7 +539,6 @@ task View { threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From d101e77cf3211079a7b7ca50c0203ffea811919b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 15:38:52 +0100 Subject: [PATCH 134/668] Add last set of updates. --- .github/PULL_REQUEST_TEMPLATE.md | 3 +- CHANGELOG.md | 2 + pacbio.wdl | 89 +++++++++++++++++++++++++ samtools.wdl | 2 +- seqtk.wdl | 9 +-- smoove.wdl | 9 +-- somaticseq.wdl | 89 ++++++++++++------------- spades.wdl | 12 ++-- star.wdl | 20 +++--- strelka.wdl | 34 +++++----- stringtie.wdl | 18 ++--- survivor.wdl | 19 +++--- talon.wdl | 10 +-- transcriptclean.wdl | 21 +++--- umi-tools.wdl | 41 ++++++------ unicycler.wdl | 5 +- vardict.wdl | 41 ++++++------ vt.wdl | 20 +++--- whatshap.wdl | 111 ++++++++++++++++--------------- wisestork.wdl | 44 ++++++------ 20 files changed, 351 insertions(+), 248 deletions(-) create mode 100644 pacbio.wdl diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 199344f5..1d52f502 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,3 @@ - ### Checklist -- [ ] Pull request details were added to CHANGELOG.md +- [ ] Pull request details were added to CHANGELOG.md. - [ ] `parameter_meta` for each task is up to date. diff --git a/CHANGELOG.md b/CHANGELOG.md index f0dfaf1f..e7d7fed7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. + Centrifuge: Rename output in task `KReport` to `KrakenReport` to resolve @@ -20,6 +21,7 @@ version 5.0.0-dev + Bam2fastx: Add localisation of input files to Bam2Fasta task. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. ++ Add PacBio preprocessing specific tasks `mergePacBio` & `ccsChunks`. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. diff --git a/pacbio.wdl b/pacbio.wdl new file mode 100644 index 00000000..01f6d4fd --- /dev/null +++ b/pacbio.wdl @@ -0,0 +1,89 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task mergePacBio { + input { + Array[File]+ reports + String mergedReport + + String memory = "4G" + String dockerImage = "lumc/pacbio-merge:0.2" + } + + command { + set -e + mkdir -p $(dirname ~{mergedReport}) + pacbio_merge \ + --reports ~{sep=" " reports} \ + --json-output ~{mergedReport} + } + + runtime { + memory: memory + docker: dockerImage + } + + output { + File MergedReport = mergedReport + } + + parameter_meta { + # inputs + reports: {description: "The PacBio report files to merge.", category: "required"} + mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task ccsChunks { + input { + Int chunkCount + + String memory = "4G" + String dockerImage = "python:3.7-slim" + } + + command { + set -e + python <' "modified_strelka.vcf" > ~{outputVCFName} } @@ -425,10 +424,10 @@ task ModifyStrelka { } parameter_meta { + # inputs strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/spades.wdl b/spades.wdl index 204dbfea..7cc16d21 100644 --- a/spades.wdl +++ b/spades.wdl @@ -22,10 +22,11 @@ version 1.0 task Spades { input { - String outputDir - String? preCommand File read1 File? read2 + String outputDir + + String? preCommand File? interlacedReads File? sangerReads File? pacbioReads @@ -44,12 +45,13 @@ task Spades { Boolean? disableGzipOutput Boolean? disableRepeatResolution File? dataset - Int threads = 1 - Int memoryGb = 16 File? tmpDir String? k Float? covCutoff Int? phredOffset + + Int threads = 1 + Int memoryGb = 16 } command { @@ -100,4 +102,4 @@ task Spades { cpu: threads memory: "~{memoryGb}G" } -} \ No newline at end of file +} diff --git a/star.wdl b/star.wdl index 3d0e2eb0..68193fcd 100644 --- a/star.wdl +++ b/star.wdl @@ -24,6 +24,7 @@ task GenomeGenerate { input { String genomeDir = "STAR_index" File referenceFasta + File? referenceGtf Int? sjdbOverhang @@ -61,8 +62,10 @@ task GenomeGenerate { File? sjdbListFromGtfOut = "~{genomeDir}/sjdbList.fromGTF.out.tab" File? sjdbListOut = "~{genomeDir}/sjdbList.out.tab" File? transcriptInfo = "~{genomeDir}/transcriptInfo.tab" - Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, chrStart, genome, genomeParameters, - sa, saIndex, exonGeTrInfo, exonInfo, geneInfo, sjdbInfo, sjdbListFromGtfOut, + Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, + chrStart, genome, genomeParameters, + sa, saIndex, exonGeTrInfo, exonInfo, + geneInfo, sjdbInfo, sjdbListFromGtfOut, sjdbListOut, transcriptInfo]) } @@ -74,16 +77,15 @@ task GenomeGenerate { } parameter_meta { + # inputs genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtf: {description: "The reference GTF file.", category: "common"} sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -95,6 +97,8 @@ task Star { String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" String readFilesCommand = "zcat" + Int outBAMcompression = 1 + Int? outFilterScoreMin Float? outFilterScoreMinOverLread Int? outFilterMatchNmin @@ -103,7 +107,6 @@ task Star { String? twopassMode = "Basic" Array[String]? outSAMattrRGline String? outSAMunmapped = "Within KeepPairs" - Int outBAMcompression = 1 Int? limitBAMsortRAM Int runThreadN = 4 @@ -119,7 +122,7 @@ task Star { # So we solve it with an optional memory string and using select_first # in the runtime section. - #TODO Could be extended for all possible output extensions + #TODO: Could be extended for all possible output extensions. Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} command { @@ -157,12 +160,14 @@ task Star { } parameter_meta { + # inputs inputR1: {description: "The first-/single-end FastQ files.", category: "required"} inputR2: {description: "The second-end FastQ files (in the same order as the first-end files).", category: "common"} indexFiles: {description: "The star index files.", category: "required"} outFileNamePrefix: {description: "The prefix for the output files. May include directories.", category: "required"} outSAMtype: {description: "The type of alignment file to be produced. Currently only `BAM SortedByCoordinate` is supported.", category: "advanced"} readFilesCommand: {description: "Equivalent to star's `--readFilesCommand` option.", category: "advanced"} + outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} outFilterScoreMin: {description: "Equivalent to star's `--outFilterScoreMin` option.", category: "advanced"} outFilterScoreMinOverLread: {description: "Equivalent to star's `--outFilterScoreMinOverLread` option.", category: "advanced"} outFilterMatchNmin: {description: "Equivalent to star's `--outFilterMatchNmin` option.", category: "advanced"} @@ -174,7 +179,6 @@ task Star { limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"} runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/strelka.wdl b/strelka.wdl index 50c38b55..f4b9888b 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -29,11 +29,12 @@ task Germline { Array[File]+ indexes File referenceFasta File referenceFastaFai - File? callRegions - File? callRegionsIndex Boolean exome = false Boolean rna = false + File? callRegions + File? callRegionsIndex + Int cores = 1 Int memoryGb = 4 Int timeMinutes = 90 @@ -61,28 +62,27 @@ task Germline { } runtime { - docker: dockerImage cpu: cores - time_minutes: timeMinutes memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs runDir: {description: "The directory to use as run/output directory.", category: "common"} bams: {description: "The input BAM files.", category: "required"} indexes: {description: "The indexes for the input BAM files.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} - callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} rna: {description: "Whether or not the data is from RNA sequencing.", category: "common"} - + callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} + callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -95,11 +95,12 @@ task Somatic { File tumorBamIndex File referenceFasta File referenceFastaFai + Boolean exome = false + File? callRegions File? callRegionsIndex File? indelCandidatesVcf File? indelCandidatesVcfIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -133,13 +134,14 @@ task Somatic { } runtime { - docker: dockerImage cpu: cores - time_minutes: timeMinutes memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs runDir: {description: "The directory to use as run/output directory.", category: "common"} normalBam: {description: "The normal/control sample's BAM file.", category: "required"} normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} @@ -147,17 +149,15 @@ task Somatic { tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} indelCandidatesVcf: {description: "An indel candidates VCF file from manta.", category: "advanced"} indelCandidatesVcfIndex: {description: "The index for the indel candidates VCF file.", category: "advanced"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { @@ -165,4 +165,4 @@ task Somatic { exclude: ["doNotDefineThis"] } } -} \ No newline at end of file +} diff --git a/stringtie.wdl b/stringtie.wdl index 5ed62dea..fff4140c 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -24,9 +24,10 @@ task Stringtie { input { File bam File bamIndex - File? referenceGtf Boolean skipNovelTranscripts = false String assembledTranscriptsFile + + File? referenceGtf Boolean? firstStranded Boolean? secondStranded String? geneAbundanceFile @@ -64,19 +65,19 @@ task Stringtie { } parameter_meta { + # inputs bam: {description: "The input BAM file.", category: "required"} bamIndex: {description: "The input BAM file's index.", category: "required"} - referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} skipNovelTranscripts: {description: "Whether new transcripts should be assembled or not.", category: "common"} assembledTranscriptsFile: {description: "Where the output of the assembly should be written.", category: "required"} + referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"} secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"} geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -84,13 +85,14 @@ task Merge { input { Array[File]+ gtfFiles String outputGtfPath + Boolean keepMergedTranscriptsWithRetainedIntrons = false + File? guideGtf Int? minimumLength Float? minimumCoverage Float? minimumFPKM Float? minimumTPM Float? minimumIsoformFraction - Boolean keepMergedTranscriptsWithRetainedIntrons = false String? label String memory = "10G" @@ -125,19 +127,19 @@ task Merge { } parameter_meta { + # inputs gtfFiles: {description: "The GTF files produced by stringtie.", category: "required"} outputGtfPath: {description: "Where the output should be written.", category: "required"} + keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} guideGtf: {description: "Equivalent to the -G option of 'stringtie --merge'.", category: "advanced"} minimumLength: {description: "Equivalent to the -m option of 'stringtie --merge'.", category: "advanced"} minimumCoverage: {description: "Equivalent to the -c option of 'stringtie --merge'.", category: "advanced"} minimumFPKM: {description: "Equivalent to the -F option of 'stringtie --merge'.", category: "advanced"} minimumTPM: {description: "Equivalent to the -T option of 'stringtie --merge'.", category: "advanced"} minimumIsoformFraction: {description: "Equivalent to the -f option of 'stringtie --merge'.", category: "advanced"} - keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} label: {description: "Equivalent to the -l option of 'stringtie --merge'.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/survivor.wdl b/survivor.wdl index b9583009..c7b31058 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -32,6 +30,7 @@ task Merge { Boolean distanceBySvSize = false Int minSize = 30 String outputPath = "./survivor/merged.vcf" + String memory = "24G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" @@ -64,15 +63,15 @@ task Merge { parameter_meta { # inputs - filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "required"} - breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "advanced"} - suppVecs: {description: "The minimum number of SV callers to support the merging", category: "advanced"} - svType: {description: "A boolean to include the type SV to be merged", category: "advanced"} - strandType: {description: "A boolean to include strand type of an SV to be merged", category: "advanced"} - distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "advanced"} - minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} + filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR.", category: "required"} + breakpointDistance: {description: "The distance between pairwise breakpoints between SVs.", category: "advanced"} + suppVecs: {description: "The minimum number of SV callers to support the merging.", category: "advanced"} + svType: {description: "A boolean to include the type SV to be merged.", category: "advanced"} + strandType: {description: "A boolean to include strand type of an SV to be merged.", category: "advanced"} + distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size.", category: "advanced"} + minSize: {description: "The mimimum size of SV to be merged.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The memory required to run the programs", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/talon.wdl b/talon.wdl index c11ab9e0..61f5eb4a 100644 --- a/talon.wdl +++ b/talon.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 79661307..efdd95f4 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -54,10 +54,10 @@ task GetSJsFromGtf { parameter_meta { # inputs - gtfFile: {description: "Input gtf file", category: "required"} - genomeFile: {description: "Reference genome", category: "required"} - minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} + gtfFile: {description: "Input gtf file.", category: "required"} + genomeFile: {description: "Reference genome.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -97,7 +97,7 @@ task GetTranscriptCleanStats { parameter_meta { # inputs - inputSam: {description: "Output sam file from transcriptclean", category: "required"} + inputSam: {description: "Output sam file from transcriptclean.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -189,8 +189,7 @@ task TranscriptClean { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs fastaFile: {description: "Fasta file containing corrected reads."} diff --git a/umi-tools.wdl b/umi-tools.wdl index c5f3b145..7b0a3991 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2017 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -26,9 +26,10 @@ task Extract { File? read2 String bcPattern String? bcPattern2 - Boolean threePrime = false String read1Output = "umi_extracted_R1.fastq.gz" String? read2Output = "umi_extracted_R2.fastq.gz" + Boolean threePrime = false + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -50,21 +51,21 @@ task Extract { } runtime { - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs read1: {description: "The first/single-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bcPattern: {description: "The pattern to be used for UMI extraction. See the umi_tools docs for more information.", category: "required"} bcPattern2: {description: "The pattern to be used for UMI extraction in the second-end reads. See the umi_tools docs for more information.", category: "advanced"} - threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} read1Output: {description: "The location to write the first/single-end output fastq file to.", category: "advanced"} read2Output: {description: "The location to write the second-end output fastq file to.", category: "advanced"} + threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -72,15 +73,15 @@ task Dedup { input { File inputBam File inputBamIndex - String? umiSeparator String outputBamPath - String? statsPrefix Boolean paired = true + String? umiSeparator + String? statsPrefix + String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - - # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) + # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -107,21 +108,21 @@ task Dedup { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} - statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} - umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} + umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} + statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} memory: {description: "The amount of memory required for the task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/unicycler.wdl b/unicycler.wdl index fc393603..938d0c7e 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -22,12 +22,13 @@ version 1.0 task Unicycler { input { + String out + String? preCommand File? short1 File? short2 File? unpaired File? long - String out Int? verbosity Int? minFastaLength Int? keep @@ -125,4 +126,4 @@ task Unicycler { cpu: threads memory: memory } -} \ No newline at end of file +} diff --git a/vardict.wdl b/vardict.wdl index 92beb32e..fc37c9ef 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -27,29 +27,28 @@ task VarDict { String tumorSampleName File tumorBam File tumorBamIndex - String? normalSampleName - File? normalBam - File? normalBamIndex File referenceFasta File referenceFastaFai File bedFile String outputVcf - - Int chromosomeColumn = 1 - Int startColumn = 2 - Int endColumn = 3 - Int geneColumn = 4 - Boolean outputCandidateSomaticOnly = true Boolean outputAllVariantsAtSamePosition = true Float mappingQuality = 20 Int minimumTotalDepth = 8 Int minimumVariantDepth = 4 Float minimumAlleleFrequency = 0.02 + Int chromosomeColumn = 1 + Int startColumn = 2 + Int endColumn = 3 + Int geneColumn = 4 + + String? normalSampleName + File? normalBam + File? normalBamIndex + String javaXmx = "16G" Int threads = 1 String memory = "18G" - String javaXmx = "16G" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } @@ -93,33 +92,31 @@ task VarDict { } parameter_meta { + # inputs tumorSampleName: {description: "The name of the tumor/case sample.", category: "required"} tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} - normalSampleName: {description: "The name of the normal/control sample.", category: "common"} - normalBam: {description: "The normal/control sample's BAM file.", category: "common"} - normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} bedFile: {description: "A bed file describing the regions to operate on. These regions must be below 1e6 bases in size.", category: "required"} outputVcf: {description: "The location to write the output VCF file to.", category: "required"} - chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"} - startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"} - endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"} - geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"} outputCandidateSomaticOnly: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-M` flag.", category: "advanced"} outputAllVariantsAtSamePosition: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-A` flag.", category: "advanced"} mappingQuality: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-Q` option.", category: "advanced"} minimumTotalDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-d` option.", category: "advanced"} minimumVariantDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-v` option.", category: "advanced"} minimumAlleleFrequency: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-f` option.", category: "advanced"} - + chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"} + startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"} + endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"} + geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"} + normalSampleName: {description: "The name of the normal/control sample.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/vt.wdl b/vt.wdl index 99cc1318..95585ff2 100644 --- a/vt.wdl +++ b/vt.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -28,9 +28,10 @@ task Normalize { File referenceFastaFai Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" - String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + String memory = "4G" Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" } command { @@ -56,13 +57,12 @@ task Normalize { # inputs inputVCF: {description: "The VCF file to process.", category: "required"} inputVCFIndex: {description: "The index of the VCF file to be processed.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/whatshap.wdl b/whatshap.wdl index 93624590..5c69400a 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -20,10 +20,14 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - task Phase { input { String outputVCF + File vcf + File vcfIndex + File phaseInput + File phaseInputIndex + File? reference File? referenceIndex String? tag @@ -33,20 +37,15 @@ task Phase { String? chromosome String? threshold String? ped - File vcf - File vcfIndex - File phaseInput - File phaseInputIndex String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e - whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -69,24 +68,27 @@ task Phase { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} - reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} - tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} - algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} - indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} + vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed).", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"} + phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF).", category: "required"} + phaseInputIndex: {description: "Index of BAM, CRAM, VCF or BCF file(s) with phase information.", category: "required"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"} + referenceIndex: {description: "Index of reference file.", category: "common"} + tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS).", category: "common"} + algorithm: {description: "Phasing algorithm to use (default: {description: whatshap).", category: "advanced"} + indels: {description: "Also phase indels (default: {description: do not phase indels).", category: "common"} sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} - vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} - vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} - phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -95,16 +97,17 @@ task Phase { task Stats { input { + File vcf + String? gtf String? sample String? tsv String? blockList String? chromosome - File vcf String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } @@ -125,18 +128,19 @@ task Stats { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - gtf: "Write phased blocks to GTF file." - sample: "Name of the sample to process. If not given, use first sample found in VCF." - tsv: "Filename to write statistics to (tab-separated)." - blockList: "Filename to write list of all blocks to (one block per line)." - chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." - vcf: "Phased VCF file" + # inputs + vcf: {description: "Phased VCF file.", category: "required"} + gtf: {description: "Write phased blocks to GTF file.", category: "common"} + sample: {description: "Name of the sample to process. If not given, use first sample found in VCF.", category: "common"} + tsv: {description: "Filename to write statistics to (tab-separated).", category: "common"} + blockList: {description: "Filename to write list of all blocks to (one block per line).", category: "advanced"} + chromosome: {description: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -145,57 +149,58 @@ task Stats { task Haplotag { input { + File vcf + File vcfIndex + File alignments + File alignmentsIndex String outputFile + File? reference File? referenceFastaIndex String? regions String? sample - File vcf - File vcfIndex - File alignments - File alignmentsIndex String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e - whatshap haplotag \ - ~{vcf} \ - ~{alignments} \ - ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ - ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ - ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} - - python3 -c "import pysam; pysam.index('~{outputFile}')" + ~{vcf} \ + ~{alignments} \ + ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} + + python3 -c "import pysam; pysam.index('~{outputFile}')" } output { - File bam = outputFile - File bamIndex = outputFile + ".bai" + File bam = outputFile + File bamIndex = outputFile + ".bai" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - outputFile: "Output file. If omitted, use standard output." - reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." - referenceFastaIndex: "Index for the reference file." - regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." - sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." - vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." - vcfIndex: "Index for the VCF or BCF file with variants to be phased." - alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype." - alignmentsIndex: "Index for the alignment file." + # inputs + vcf: {description: "VCF file with phased variants (must be gzip-compressed and indexed).", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"} + alignments: {description: "File (BAM/CRAM) with read alignments to be tagged by haplotype.", category: "required"} + alignmentsIndex: {description: "Index for the alignment file.", category: "required"} + outputFile: {description: "Output file. If omitted, use standard output.", category: "required"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"} + referenceFastaIndex: {description: "Index for the reference file.", category: "common"} + regions: {description: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome).", category: "advanced"} + sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/wisestork.wdl b/wisestork.wdl index 0fd812b1..6be32168 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -22,13 +22,15 @@ version 1.0 task Count { input { - Int? binSize - File reference - File referenceIndex - File? binFile File inputBam File inputBamIndex + File reference + File referenceIndex String outputBed = "output.bed" + + Int? binSize + File? binFile + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -54,15 +56,17 @@ task Count { task GcCorrect { input { - Int? binSize File reference File referenceIndex - File? binFile File inputBed String outputBed = "output.bed" + + Int? binSize + File? binFile Float? fracN Int? iter Float? fracLowess + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -91,13 +95,16 @@ task GcCorrect { task Newref { input { - Int? binSize File reference File referenceIndex - File? binFile Array[File]+ inputBeds String outputBed = "output.bed" + + Int? binSize + File? binFile Int? nBins + + Int memory = 2 + ceil(length(inputBeds) * 0.15) String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -106,36 +113,36 @@ task Newref { mkdir -p $(dirname ~{outputBed}) wisestork newref \ ~{"--binsize " + binSize} \ - --reference ~{reference} \ - ~{"--bin-file " + binFile} \ - --output ~{outputBed} \ - -I ~{sep=" -I " inputBeds} \ - ~{"--n-bins " + nBins} + --reference ~{reference} \ + ~{"--bin-file " + binFile} \ + --output ~{outputBed} \ + -I ~{sep=" -I " inputBeds} \ + ~{"--n-bins " + nBins} } output { File bedFile = outputBed } - Int memory = 2 + ceil(length(inputBeds) * 0.15) - runtime { - docker: dockerImage memory: "~{memory}G" + docker: dockerImage } } task Zscore { input { - Int? binSize File reference File referenceIndex - File? binFile File inputBed File inputBedIndex File dictionaryFile File dictionaryFileIndex String outputBed = "output.bed" + + Int? binSize + File? binFile + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -159,4 +166,3 @@ task Zscore { docker: dockerImage } } - From f34613058333fbc3a523ef513fdc6026cdd87378 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 17:21:20 +0100 Subject: [PATCH 135/668] Update lima to match isoseq3 and ccs changes. --- CHANGELOG.md | 1 + lima.wdl | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7d7fed7..9bff5f3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ version 5.0.0-dev + Bwa & bwa-mem2: Add parameter_meta for `outputHla`. + Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. + Bam2fastx: Add localisation of input files to Bam2Fasta task. ++ Lima: `cores` input has been renamed to `threads` to match tool naming. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. + Add PacBio preprocessing specific tasks `mergePacBio` & `ccsChunks`. diff --git a/lima.wdl b/lima.wdl index 1da4ef5e..33b2328b 100644 --- a/lima.wdl +++ b/lima.wdl @@ -48,7 +48,7 @@ task Lima { File barcodeFile String outputPrefix - Int cores = 2 + Int threads = 2 String memory = "2G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" @@ -82,7 +82,7 @@ task Lima { --guess-min-count ~{guessMinCount} \ ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ - --num-threads ~{cores} \ + --num-threads ~{threads} \ ~{"--log-file " + outputPrefix + ".stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ @@ -110,7 +110,7 @@ task Lima { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -143,7 +143,7 @@ task Lima { inputBamFile: {description: "Bam input file.", category: "required"} barcodeFile: {description: "Barcode/primer fasta file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From b96ec320ded2fec077f358460376bba1582337ac Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:21:20 +0100 Subject: [PATCH 136/668] Update bwa.wdl. Co-authored-by: Davy Cats --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 0f09f7a9..ee01957e 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -33,8 +33,8 @@ task Mem { String? readgroup Int? sortThreads + Int? memoryGb - Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 From adf58a85569ca3335874b5cf55bf86933aacbb8e Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:22:19 +0100 Subject: [PATCH 137/668] Update bwa-mem2.wdl. Co-authored-by: Davy Cats --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 34cd38a6..89a48fbd 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -33,8 +33,8 @@ task Mem { String? readgroup Int? sortThreads + Int? memoryGb - Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 From a5aa0fef74bbadb4ea1562ebf65e860975dc3fbe Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:22:56 +0100 Subject: [PATCH 138/668] Update bwa-mem2.wdl --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 89a48fbd..b4ca877a 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 + # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } From d35543c91eba6179b6738f9ac3eb412ded0f60a6 Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:23:12 +0100 Subject: [PATCH 139/668] Update bwa.wdl --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index ee01957e..f4061729 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 + # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } From 38514b1db4049d1127f03e4c888027c99c2b9bcf Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:25:29 +0100 Subject: [PATCH 140/668] Update bam2fastx.wdl --- bam2fastx.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 2ad08581..2ae22a57 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -58,7 +58,7 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " bam} + $bamFiles } output { From 8a02fc35c76674d7b2b7e1d4b9addaaaea58e9ff Mon Sep 17 00:00:00 2001 From: Jasper Date: Wed, 4 Nov 2020 08:49:03 +0100 Subject: [PATCH 141/668] Update bwa-mem2.wdl --- bwa-mem2.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index b4ca877a..4566e68c 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -34,8 +34,8 @@ task Mem { String? readgroup Int? sortThreads - Int? memoryGb Int threads = 4 + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" @@ -101,8 +101,8 @@ task Mem { compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From c8e043006f744f23155d0fba00ebec962bf5c910 Mon Sep 17 00:00:00 2001 From: Jasper Date: Wed, 4 Nov 2020 08:49:50 +0100 Subject: [PATCH 142/668] Update bwa.wdl --- bwa.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index f4061729..e87fd82a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -33,9 +33,9 @@ task Mem { String? readgroup Int? sortThreads - - Int? memoryGb + Int threads = 4 + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" @@ -96,8 +96,8 @@ task Mem { compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4a2f3366cb5f0cd57bfab8da01369c29c6a35063 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 5 Nov 2020 16:19:31 +0100 Subject: [PATCH 143/668] add tasks for amber and cobalt, group tasks from hmftools in one file --- gripss.wdl | 122 ----------------------------------------------------- sage.wdl | 100 ------------------------------------------- 2 files changed, 222 deletions(-) delete mode 100644 gripss.wdl delete mode 100644 sage.wdl diff --git a/gripss.wdl b/gripss.wdl deleted file mode 100644 index c9a8f27d..00000000 --- a/gripss.wdl +++ /dev/null @@ -1,122 +0,0 @@ -version 1.0 - -# Copyright (c) 2020 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -task ApplicationKt { - input { - File inputVcf - String outputPath = "gripss.vcf.gz" - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File breakpointHotspot - File breakendPon - File breakpointPon - - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssApplicationKt \ - -ref_genome ~{referenceFasta} \ - -breakpoint_hotspot ~{breakpointHotspot} \ - -breakend_pon ~{breakendPon} \ - -breakpoint_pon ~{breakpointPon} \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} - breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task HardFilterApplicationKt { - input { - File inputVcf - String outputPath = "gripss_hard_filter.vcf.gz" - - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} \ No newline at end of file diff --git a/sage.wdl b/sage.wdl deleted file mode 100644 index ab42bee8..00000000 --- a/sage.wdl +++ /dev/null @@ -1,100 +0,0 @@ -version 1.0 - -# Copyright (c) 2020 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -task Sage { - input { - String tumorName - File tumorBam - File tumorBamIndex - String? normalName - File? normalBam - File? normalBamIndex - File referenceFasta - File referenceFastaDict - File referenceFastaFai - File hotspots - File panelBed - File highConfidenceBed - Boolean hg38 = false - String outputPath = "./sage.vcf.gz" - - Int threads = 2 - String javaXmx = "32G" - String memory = "33G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ - -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{true="hg38" false="hg19" hg38} \ - -threads ~{threads} \ - -out ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. - # This seems to be a systemic issue with R generated plots in biocontainers... - } - - runtime { - time_minutes: timeMinutes # !UnknownRuntimeKey - cpu: threads - docker: dockerImage - memory: memory - } - - parameter_meta { - tumorName: {description: "The name of the tumor sample.", category: "required"} - tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} - highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} From 333f052f344b331591797bccbe45028c6882b770 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 11:44:09 +0100 Subject: [PATCH 144/668] Update first set of parameter_meta. --- CHANGELOG.md | 1 + TO-DO.md | 13 +++++++ bcftools.wdl | 15 ++++++++ bedtools.wdl | 32 ++++++++++++++++ biopet/bamstats.wdl | 11 +++--- biopet/biopet.wdl | 85 ++++++++++++++++++++++------------------- biopet/sampleconfig.wdl | 21 +++++----- biopet/seqstat.wdl | 9 +++-- biowdl.wdl | 3 ++ bowtie.wdl | 4 ++ chunked-scatter.wdl | 6 +++ 11 files changed, 143 insertions(+), 57 deletions(-) create mode 100644 TO-DO.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bff5f3c..b7a8741f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Complete `parameter_meta` for tasks missing the outputs. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. diff --git a/TO-DO.md b/TO-DO.md new file mode 100644 index 00000000..cc76a5d6 --- /dev/null +++ b/TO-DO.md @@ -0,0 +1,13 @@ +#TO DO +## Requires parameter_meta: +* biopet.wdl: `ExtractAdaptersFastqc`. + +## Duplicate tasks: +* + +## Out of date with new cluster & parameter_meta: +* bamstats.wdl: `Generate`. +* biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, + `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats`. +* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl`. +* seqstat.wdl: `Generate`. diff --git a/bcftools.wdl b/bcftools.wdl index 41825747..28380dea 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,6 +118,10 @@ task Annotate { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Annotated VCF file."} + outputVcfIndex: {description: "Index of the annotated VCF file."} } } @@ -165,6 +169,10 @@ task Sort { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file."} + outputVcfIndex: {description: "Index of sorted VCF file."} } } @@ -272,6 +280,9 @@ task Stats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + stats: {description: "Text file stats which is suitable for machine processing and can be plotted using plot-vcfstats."} } } @@ -316,5 +327,9 @@ task View { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "VCF file."} + outputVcfIndex: {description: "Index of VCF file."} } } diff --git a/bedtools.wdl b/bedtools.wdl index b7a03c17..3dbf93cb 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -60,6 +60,9 @@ task Complement { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + complementBed: {description: "All intervals in a genome that are not covered by at least one interval in the input file."} } } @@ -95,6 +98,9 @@ task Merge { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedBed: {description: "Merged bed file."} } } @@ -132,6 +138,9 @@ task MergeBedFiles { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedBed: {description: "Merged bed file."} } } @@ -179,6 +188,26 @@ task Sort { time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + inputBed: {description: "The bed to sort.", category: "required"} + sizeA: {description: "Sort by feature size in ascending order.", category: "common"} + sizeD: {description: "Sort by feature size in descending order.", category: "common"} + chrThenSizeA: {description: "Sort by chromosome (asc), then by feature size (asc).", category: "common"} + chrThenSizeD: {description: "Sort by chromosome (asc), then by feature size (desc).", category: "common"} + chrThenScoreA: {description: "Sort by chromosome (asc), then by score (asc).", category: "common"} + chrThenScoreD: {description: "Sort by chromosome (asc), then by score (desc).", category: "common"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + genome: {description: "Define sort order by order of tab-delimited file with chromosome names in the first column.", category: "advanced"} + faidx: {description: "Define sort order by order of tab-delimited file with chromosome names in the first column. Sort by specified chromosome order.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + sortedBed: {description: "The sorted bed file."} + } } task Intersect { @@ -226,5 +255,8 @@ task Intersect { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + intersectedBed: {description: "The intersected bed file."} } } diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index d71355d3..d01bc10c 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -24,18 +24,19 @@ import "../common.wdl" as common task Generate { input { - String? preCommand - File? toolJar IndexedBamFile bam - File? bedFile Boolean scatterMode = false Boolean onlyUnmapped = false Boolean tsvOutputs = false String outputDir + + String? preCommand + File? toolJar + File? bedFile Reference? reference - String memory = "9G" String javaXmx = "8G" + String memory = "9G" } File referenceFasta = if defined(reference) then select_first([reference]).fasta else "" @@ -66,4 +67,4 @@ task Generate { runtime { memory: memory } -} \ No newline at end of file +} diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 89319409..07f51e67 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -24,15 +24,16 @@ import "../common.wdl" task BaseCounter { input { - String? preCommand - File? toolJar IndexedBamFile bam File refFlat String outputDir String prefix - String memory = "5G" + String? preCommand + File? toolJar + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -98,16 +99,17 @@ task ExtractAdaptersFastqc { String outputDir String adapterOutputFilePath = outputDir + "/adapter.list" String contamsOutputFilePath = outputDir + "/contaminations.list" + Boolean? skipContams File? knownContamFile File? knownAdapterFile Float? adapterCutoff Boolean? outputAsFasta - String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" + String memory = "9G" Int timeMinutes = 5 + String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" } command { @@ -133,20 +135,21 @@ task ExtractAdaptersFastqc { runtime { memory: memory - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } } task FastqSplitter { input { - String? preCommand File inputFastq Array[String]+ outputPaths + + String? preCommand File? toolJar - String memory = "5G" String javaXmx = "4G" + String memory = "5G" String dockerImage = "quay.io/biocontainers/biopet-fastqsplitter:0.1--2" } @@ -170,15 +173,16 @@ task FastqSplitter { task FastqSync { input { - String? preCommand FastqPair refFastq FastqPair inputFastq String out1path String out2path + + String? preCommand File? toolJar - String memory = "5G" String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -200,8 +204,8 @@ task FastqSync { output { FastqPair out1 = object { - R1: out1path, - R2: out2path + R1: out1path, + R2: out2path } } @@ -215,14 +219,15 @@ task ScatterRegions { File referenceFasta File referenceFastaDict Int scatterSizeMillions = 1000 + Boolean notSplitContigs = false + Int? scatterSize File? regions - Boolean notSplitContigs = false File? bamFile File? bamIndex - String memory = "1G" String javaXmx = "500M" + String memory = "1G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" } @@ -264,41 +269,40 @@ task ScatterRegions { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} + notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", category: "advanced"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} regions: {description: "The regions to be scattered.", category: "advanced"} - notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", - category: "advanced"} - bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", - category: "advanced"} + bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", category: "advanced"} bamIndex: {description: "The index for the bamfile given through bamFile.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + scatters: {description: "Smaller scatter regions of equal size."} } } task ValidateAnnotation { input { + Reference reference + File? refRefflat File? gtfFile - Reference reference - String memory = "4G" String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/biopet-validateannotation:0.1--0" } @@ -323,8 +327,9 @@ task ValidateFastq { input { File read1 File? read2 - String memory = "4G" + String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--1" } @@ -348,8 +353,9 @@ task ValidateVcf { input { IndexedVcfFile vcf Reference reference - String memory = "4G" + String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/biopet-validatevcf:0.1--0" } @@ -374,12 +380,6 @@ task VcfStats { IndexedVcfFile vcf Reference reference String outputDir - File? intervals - Array[String]+? infoTags - Array[String]+? genotypeTags - Int? sampleToSampleMinDepth - Int? binSize - Int? maxContigsInSingleJob Boolean writeBinStats = false Int localThreads = 1 Boolean notWriteContigStats = false @@ -387,13 +387,20 @@ task VcfStats { Boolean skipGenotype = false Boolean skipSampleDistributions = false Boolean skipSampleCompare = false + + File? intervals + Array[String]+? infoTags + Array[String]+? genotypeTags + Int? sampleToSampleMinDepth + Int? binSize + Int? maxContigsInSingleJob String? sparkMaster Int? sparkExecutorMemory Array[String]+? sparkConfigValues - String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" - String memory = "5G" String javaXmx = "4G" + String memory = "5G" + String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" } command { diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl index 2b36952b..f3955658 100644 --- a/biopet/sampleconfig.wdl +++ b/biopet/sampleconfig.wdl @@ -24,18 +24,19 @@ import "../common.wdl" as common task SampleConfig { input { - File? toolJar - String? preCommand Array[File]+ inputFiles String keyFilePath + + File? toolJar + String? preCommand String? sample String? library String? readgroup String? jsonOutputPath String? tsvOutputPath - String memory = "17G" String javaXmx = "16G" + String memory = "17G" } String toolCommand = if defined(toolJar) @@ -69,13 +70,14 @@ task SampleConfig { task SampleConfigCromwellArrays { input { - File? toolJar - String? preCommand Array[File]+ inputFiles String outputPath - String memory = "5G" + File? toolJar + String? preCommand + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -102,16 +104,17 @@ task SampleConfigCromwellArrays { task CaseControl { input { - File? toolJar - String? preCommand Array[File]+ inputFiles Array[File]+ inputIndexFiles Array[File]+ sampleConfigs String outputPath String controlTag = "control" - String memory = "5G" + File? toolJar + String? preCommand + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl index e3a55ec3..c2eb5866 100644 --- a/biopet/seqstat.wdl +++ b/biopet/seqstat.wdl @@ -24,16 +24,17 @@ import "../common.wdl" as common task Generate { input { - String? preCommand - File? toolJar FastqPair fastq String outputFile String sample String library String readgroup - String memory = "5G" + String? preCommand + File? toolJar + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -60,4 +61,4 @@ task Generate { runtime { memory: memory } -} \ No newline at end of file +} diff --git a/biowdl.wdl b/biowdl.wdl index 8a1f9dfd..06b1d756 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -68,5 +68,8 @@ task InputConverter { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + json: {description: "JSON file version of the input sample sheet."} } } diff --git a/bowtie.wdl b/bowtie.wdl index 7fb1b614..87210dcd 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -100,5 +100,9 @@ task Bowtie { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Output alignment file."} + outputBamIndex: {description: "Index of output alignment file."} } } diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 844d6990..fba1af5a 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -68,6 +68,9 @@ task ChunkedScatter { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + scatters: {description: "Overlapping chunks of a given size in new bed files."} } } @@ -118,5 +121,8 @@ task ScatterRegions { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + scatters: {description: "Bed file where the contigs add up approximately to the given scatter size."} } } From 9d68eb40b045b859cb2619b0f1cec1d2f7437f2e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 12:08:06 +0100 Subject: [PATCH 145/668] Update more tasks. --- .github/PULL_REQUEST_TEMPLATE.md | 3 ++- TO-DO.md | 19 ++++++++++++++----- clever.wdl | 5 ++++- collect-columns.wdl | 3 +++ common.wdl | 6 ++++++ 5 files changed, 29 insertions(+), 7 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1d52f502..3b4ec9ac 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,4 @@ ### Checklist - [ ] Pull request details were added to CHANGELOG.md. -- [ ] `parameter_meta` for each task is up to date. +- [ ] Documentation was updated (if required). +- [ ] `parameter_meta` was added/updated (if required). diff --git a/TO-DO.md b/TO-DO.md index cc76a5d6..7a18bb33 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,13 +1,22 @@ #TO DO ## Requires parameter_meta: -* biopet.wdl: `ExtractAdaptersFastqc`. +* biopet.wdl: `ExtractAdaptersFastqc` ## Duplicate tasks: * ## Out of date with new cluster & parameter_meta: -* bamstats.wdl: `Generate`. +* bamstats.wdl: `Generate` * biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, - `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats`. -* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl`. -* seqstat.wdl: `Generate`. + `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats` +* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl` +* seqstat.wdl: `Generate` +* common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, + `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` + +## Imports other tasks: +* bamstats.wdl +* biopet.wdl +* sampleconfig.wdl +* seqstat.wdl +* clever.wdl diff --git a/clever.wdl b/clever.wdl index 75e889b3..186be514 100644 --- a/clever.wdl +++ b/clever.wdl @@ -80,6 +80,9 @@ task Mateclever { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + matecleverVcf: {description: "VCF with additional mateclever results."} } } @@ -132,6 +135,6 @@ task Prediction { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - predictions: {description: "The predicted deletions (VCF) from clever.", category: "advanced"} + predictions: {description: "The predicted deletions (VCF) from clever."} } } diff --git a/collect-columns.wdl b/collect-columns.wdl index 67db6179..3d65c7e7 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -83,5 +83,8 @@ task CollectColumns { memoryGb: {description: "The maximum amount of memory the job will need in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputTable: {description: "All input columns combined into one table."} } } diff --git a/common.wdl b/common.wdl index b3878bb6..66bdb99c 100644 --- a/common.wdl +++ b/common.wdl @@ -207,6 +207,9 @@ task TextToFile { outputFile: {description: "The name of the output file.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + out: {description: "File containing input text."} } } @@ -251,6 +254,9 @@ task YamlToJson { memory: {description: "The maximum amount of memory the job will need.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + json: {description: "JSON file version of input YAML."} } } From 3123947972f8a4cb288f96e539e143b40e3e136a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 13:04:35 +0100 Subject: [PATCH 146/668] Update even more tasks. --- CPAT.wdl | 3 +++ TO-DO.md | 12 ++++++------ biopet/biopet.wdl | 23 +++++++++++++++++++++++ cutadapt.wdl | 14 ++++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index d97031dc..afb67853 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -76,6 +76,9 @@ task CPAT { stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outFile: {description: "CPAT logistic regression model."} } } diff --git a/TO-DO.md b/TO-DO.md index 7a18bb33..be118b70 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,10 +1,4 @@ #TO DO -## Requires parameter_meta: -* biopet.wdl: `ExtractAdaptersFastqc` - -## Duplicate tasks: -* - ## Out of date with new cluster & parameter_meta: * bamstats.wdl: `Generate` * biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, @@ -20,3 +14,9 @@ * sampleconfig.wdl * seqstat.wdl * clever.wdl + +## Requires parameter_meta: +* + +## Duplicate tasks: +* diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 07f51e67..e6619e09 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -138,6 +138,29 @@ task ExtractAdaptersFastqc { time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + inputFile: {description: "Input fastq file.", category: "required"} + outputDir: {description: "The path to which the output should be written.", category: "required"} + adapterOutputFilePath: {description: "Output file for adapters, if not supplied output will go to stdout.", category: "common"} + contamsOutputFilePath: {description: "Output file for contaminations, if not supplied output will go to stdout.", category: "common"} + skipContams: {description: "If this is set only the adapters block is used, other wise contaminations is also used.", category: "advanced"} + knownContamFile: {description: "This file should contain the known contaminations from fastqc.", category: ""advanced} + knownAdapterFile: {description: "This file should contain the known adapters from fastqc.", category: "advanced"} + adapterCutoff: {description: "The fraction of the adapters in a read should be above this fraction, default is 0.001.", category: "advanced"} + outputAsFasta: {description: "Output in fasta format, default only sequences.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + adapterOutputFile: {description: "Output file with adapters."} + contamsOutputFile: {description: "Output file with contaminations."} + adapterList: {description: "List of adapters."} + contamsList: {description: "List of contaminations."} + } } task FastqSplitter { diff --git a/cutadapt.wdl b/cutadapt.wdl index 74f57912..8d409c3d 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -235,5 +235,19 @@ task Cutadapt { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + cutRead1: {description: ""} + report: {description: ""} + cutRead2: {description: ""} + tooLongOutput: {description: ""} + tooShortOutput: {description: ""} + untrimmedOutput: {description: ""} + tooLongPairedOutput: {description: ""} + tooShortPairedOutput: {description: ""} + untrimmedPairedOutput: {description: ""} + infoFile: {description: ""} + restFile: {description: ""} + wildcardFile: {description: ""} } } From e76c0c02f410eb8b8f9b09b9cbccb125930d4c35 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 13:50:40 +0100 Subject: [PATCH 147/668] Upload more tasks. --- cutadapt.wdl | 24 ++++++++++++------------ deepvariant.wdl | 7 +++++++ delly.wdl | 3 +++ fastqc.wdl | 12 ++++++++++++ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index 8d409c3d..b2dbdec0 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -237,17 +237,17 @@ task Cutadapt { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - cutRead1: {description: ""} - report: {description: ""} - cutRead2: {description: ""} - tooLongOutput: {description: ""} - tooShortOutput: {description: ""} - untrimmedOutput: {description: ""} - tooLongPairedOutput: {description: ""} - tooShortPairedOutput: {description: ""} - untrimmedPairedOutput: {description: ""} - infoFile: {description: ""} - restFile: {description: ""} - wildcardFile: {description: ""} + cutRead1: {description: "Trimmed read one."} + report: {description: "Per-adapter statistics file."} + cutRead2: {description: "Trimmed read two in pair."} + tooLongOutput: {description: "Reads that are too long according to -M."} + tooShortOutput: {description: "Reads that are too short according to -m."} + untrimmedOutput: {description: "All reads without adapters (instead of the regular output file)."} + tooLongPairedOutput: {description: "Second reads in a pair."} + tooShortPairedOutput: {description: "Second reads in a pair."} + untrimmedPairedOutput: {description: "The second reads in a pair that were not trimmed."} + infoFile: {description: "Detailed information about where adapters were found in each read."} + restFile: {description: "The rest file."} + wildcardFile: {description: "The wildcard file."} } } diff --git a/deepvariant.wdl b/deepvariant.wdl index 8b08e111..f71a1c88 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -90,5 +90,12 @@ task RunDeepVariant { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVCF: {description: "Output VCF file."} + outputVCFIndex: {description: "Index of output VCF file."} + outputVCFStatsReport: {description: "Statistics file."} + outputGVCF: {description: "GVCF version of VCF file(s)."} + outputGVCFIndex: {description: "Index of GVCF file(s)."} } } diff --git a/delly.wdl b/delly.wdl index ffe9023a..bf00ed36 100644 --- a/delly.wdl +++ b/delly.wdl @@ -62,5 +62,8 @@ task CallSV { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dellyBcf: {description: "File containing structural variants."} } } diff --git a/fastqc.wdl b/fastqc.wdl index feeeaae5..7ca0baa8 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -125,6 +125,13 @@ task Fastqc { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + htmlReport: {description: ""} + reportZip: {description: ""} + summary: {description: ""} + rawReport: {description: ""} + images: {description: ""} } meta { @@ -167,5 +174,10 @@ task GetConfiguration { # inputs timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + adapterList: {description: ""} + contaminantList: {description: ""} + limits: {description: ""} } } From 091c3e313077aff989a43e13052625330646ad47 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 17:12:40 +0100 Subject: [PATCH 148/668] Upload another batch. --- TO-DO.md | 2 ++ fastqc.wdl | 16 ++++++++-------- gatk.wdl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index be118b70..8c1723b6 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -7,6 +7,8 @@ * seqstat.wdl: `Generate` * common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` +* fastqsplitter.wdl: `Fastqsplitter` +* flash.wdl: `Flash` ## Imports other tasks: * bamstats.wdl diff --git a/fastqc.wdl b/fastqc.wdl index 7ca0baa8..973eeed9 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -127,11 +127,11 @@ task Fastqc { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - htmlReport: {description: ""} - reportZip: {description: ""} - summary: {description: ""} - rawReport: {description: ""} - images: {description: ""} + htmlReport: {description: "HTML report file."} + reportZip: {description: "Source data file."} + summary: {description: "Summary file."} + rawReport: {description: "Raw report file."} + images: {description: "Images in report file."} } meta { @@ -176,8 +176,8 @@ task GetConfiguration { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - adapterList: {description: ""} - contaminantList: {description: ""} - limits: {description: ""} + adapterList: {description: "List of adapters found."} + contaminantList: {description: "List of contaminants found."} + limits: {description: "Limits file."} } } diff --git a/gatk.wdl b/gatk.wdl index 7aa2915c..d13c1175 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -78,6 +78,9 @@ task AnnotateIntervals { javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + annotatedIntervals: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a sequence dictionary, a row specifying the column headers for the contained annotations, and the corresponding entry rows."} } } @@ -145,6 +148,11 @@ task ApplyBQSR { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + recalibratedBam: {description: "A BAM file containing the recalibrated read data."} + recalibratedBamIndex: {description: "Index of recalibrated BAM file."} + recalibratedBamMd5: {description: "MD5 of recalibrated BAM file."} } } @@ -211,6 +219,9 @@ task BaseRecalibrator { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + recalibrationReport: {description: "A GATK Report file with many tables."} } } @@ -255,6 +266,10 @@ task CalculateContamination { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + contaminationTable: {description: "Table with fractions of reads from cross-sample contamination."} + mafTumorSegments: {description: "Tumor segments table."} } } @@ -297,6 +312,10 @@ task CallCopyRatioSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + calledSegments: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn, and the corresponding entry rows."} + calledSegmentsIgv: {description: "This is a tab-separated values (TSV) file with CBS-format column headers and the corresponding entry rows that can be plotted using IGV."} } } @@ -353,6 +372,9 @@ task CollectAllelicCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + allelicCounts: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in AllelicCountCollection.AllelicCountTableColumn, and the corresponding entry rows."} } } @@ -410,6 +432,9 @@ task CollectReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + counts: {description: "Read counts at specified intervals."} } } @@ -464,6 +489,10 @@ task CombineGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "A combined multi-sample gVCF."} + outputVcfIndex: {description: "Index of the output file."} } } @@ -535,6 +564,10 @@ task CombineVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + combinedVcf: {description: "Combined VCF file."} + combinedVcfIndex: {description: "Index of combined VCF file."} } } @@ -580,6 +613,9 @@ task CreateReadCountPanelOfNormals { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + PON: {description: "Panel-of-normals file."} } } @@ -630,6 +666,10 @@ task DenoiseReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + standardizedCopyRatios: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CopyRatioCollection.CopyRatioTableColumn, and the corresponding entry rows."} + denoisedCopyRatios: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CopyRatioCollection.CopyRatioTableColumn, and the corresponding entry rows."} } } @@ -700,6 +740,11 @@ task FilterMutectCalls { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + filteredVcf: {description: ""} + filteredVcfIndex: {description: ""} + filteringStats: {description: ""} } } From eeff6ce5e37f75aa508fec3bf9ba38bede23dd17 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 11:01:08 +0100 Subject: [PATCH 149/668] Update gatk with outputs. --- gatk.wdl | 80 +++++++++++++++++++++++++++++++++++++++++++++++--- gffcompare.wdl | 9 ++++++ 2 files changed, 85 insertions(+), 4 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index d13c1175..82244caa 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -581,7 +581,8 @@ task CreateReadCountPanelOfNormals { String javaXmx = "7G" String memory = "8G" Int timeMinutes = 5 - String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason... + # The biocontainer causes a spark related error for some reason. + String dockerImage = "broadinstitute/gatk:4.1.8.0" } command { @@ -742,9 +743,9 @@ task FilterMutectCalls { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - filteredVcf: {description: ""} - filteredVcfIndex: {description: ""} - filteringStats: {description: ""} + filteredVcf: {description: "VCF file with filtered variants from a Mutect2 VCF callset."} + filteredVcfIndex: {description: "Index of output VCF file."} + filteringStats: {description: "The output filtering stats file."} } } @@ -787,6 +788,9 @@ task GatherBqsrReports { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBQSRreport: {description: "Single file with scattered BQSR recalibration reports gathered into one."} } } @@ -840,6 +844,9 @@ task GenomicsDBImport { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + genomicsDbTarArchive: {description: "Imported VCFs to GenomicsDB file."} } } @@ -907,6 +914,10 @@ task GenotypeGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVCF: {description: "A final VCF in which all samples have been jointly genotyped. "} + outputVCFIndex: {description: "Index of final VCF file."} } } @@ -959,6 +970,9 @@ task GetPileupSummaries { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + pileups: {description: "Pileup metrics for inferring contamination."} } } @@ -1047,6 +1061,10 @@ task HaplotypeCaller { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVCF: {description: "Raw, unfiltered SNP and indel calls."} + outputVCFIndex: {description: "Index of output VCF."} } } @@ -1085,6 +1103,9 @@ task LearnReadOrientationModel { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + artifactPriorsTable: {description: "Maximum likelihood estimates of artifact prior probabilities in the orientation bias mixture model filter."} } } @@ -1123,6 +1144,9 @@ task MergeStats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedStats: {description: "Merged stats from scattered Mutect2 runs."} } } @@ -1190,6 +1214,19 @@ task ModelSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + hetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the case sample."} + copyRatioSegments: {description: "It contains the segments from the .modelFinal.seg file converted to a format suitable for input to CallCopyRatioSegments."} + copyRatioCBS: {description: "The posterior medians of the log2 copy ratio."} + alleleFractionCBS: {description: "Minor-allele fraction."} + unsmoothedModeledSegments: {description: "The initial modeled-segments result before segmentation smoothing."} + unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing"} + unsmoothedAlleleFractionParameters: {description: "The initial allele-fraction-model global-parameter result before segmentation smoothing."} + modeledSegments: {description: "The final modeled-segments result after segmentation smoothing."} + copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing"} + alleleFractionParameters: {description: "The final allele-fraction-model global-parameter result after segmentation smoothing."} + normalHetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the matched-normal sample."} } } @@ -1268,6 +1305,12 @@ task MuTect2 { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + vcfFile: {description: "Somatic SNVs and indels called via local assembly of haplotypes."} + vcfFileIndex: {description: "Index for Mutect2 VCF."} + f1r2File: {description: "Contains information that can then be passed to LearnReadOrientationModel, which generate an artifact prior table for each tumor sample for FilterMutectCalls to use."} + stats: {description: "Stats file."} } } @@ -1327,6 +1370,14 @@ task PlotDenoisedCopyRatios { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + denoisedCopyRatiosPlot: {description: "Plot showing the entire range of standardized and denoised copy ratios."} + standardizedMedianAbsoluteDeviation: {description: "Standardized median absolute deviation copy ratios."} + denoisedMedianAbsoluteDeviation: {description: "Denoised median absolute deviation copy ratios."} + deltaMedianAbsoluteDeviation: {description: "The change between `standardizedMedianAbsoluteDeviation` & `denoisedMedianAbsoluteDeviation`."} + deltaScaledMedianAbsoluteDeviation: {description: "The change between `standardizedMedianAbsoluteDeviation` & `denoisedMedianAbsoluteDeviation` scaled by standardized MAD."} + denoisedCopyRatiosLimitedPlot: {description: "Plot showing the standardized and denoised copy ratios limited to ratios within [0, 4]."} } } @@ -1384,6 +1435,9 @@ task PlotModeledSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + modeledSegmentsPlot: {description: "This plot shows the input denoised copy ratios and/or alternate-allele fractions as points, as well as box plots for the available posteriors in each segment."} } } @@ -1443,6 +1497,9 @@ task PreprocessIntervals { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + intervalList: {description: "Preprocessed Picard interval-list file."} } } @@ -1501,6 +1558,10 @@ task SelectVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "A new VCF file containing the selected subset of variants."} + outputVcfIndex: {description: "Index of the new output VCF file."} } } @@ -1555,6 +1616,10 @@ task SplitNCigarReads { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bam: {description: "BAM file with reads split at N CIGAR elements and CIGAR strings updated."} + bamIndex: {description: "Index of output BAM file."} } } @@ -1639,6 +1704,9 @@ task VariantEval { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + table: {description: "Evaluation tables detailing the results of the eval modules which were applied."} } } @@ -1696,5 +1764,9 @@ task VariantFiltration { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed. "} + filteredVcfIndex: {description: "Index of filtered VCF."} } } diff --git a/gffcompare.wdl b/gffcompare.wdl index 8bd53091..f8f0ade0 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -142,6 +142,15 @@ task GffCompare { namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + annotated: {description: ""} + loci: {description: ""} + stats: {description: ""} + tracking: {description: ""} + allFiles: {description: ""} + redundant: {description: ""} + missedIntrons: {description: ""} } meta { From 47aab9c501eb8c3a80250c4d993d0b2e5614ae16 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 11:33:45 +0100 Subject: [PATCH 150/668] Upload some more tasks. --- gffcompare.wdl | 12 ++++++------ gffread.wdl | 6 ++++++ gridss.wdl | 6 ++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index f8f0ade0..221c4907 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -144,13 +144,13 @@ task GffCompare { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - annotated: {description: ""} + annotated: {description: "Annotated GTF file."} loci: {description: ""} - stats: {description: ""} - tracking: {description: ""} - allFiles: {description: ""} - redundant: {description: ""} - missedIntrons: {description: ""} + stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."} + tracking: {description: "File matching transcripts up between samples."} + allFiles: {description: "A collection of all outputs files."} + redundant: {description: "File containing "duplicate"/"redundant" transcripts."} + missedIntrons: {description: "File denoting missed introns."} } meta { diff --git a/gffread.wdl b/gffread.wdl index 343011e9..66230989 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -80,5 +80,11 @@ task GffRead { filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + exonsFasta: {description: "Fasta file containing exons."} + CDSFasta: {description: "Fasta file containing CDS's."} + proteinFasta: {description: "Fasta file containing proteins."} + filteredGff: {description: "Filtered GFF file."} } } diff --git a/gridss.wdl b/gridss.wdl index 9499be5e..65579fd9 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -81,5 +81,11 @@ task GRIDSS { jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling.",category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + vcf: {description: ""} + vcfIndex: {description: ""} + assembly: {description: ""} + assemblyIndex: {description: ""} } } From 68d98441faddc47b8060d42864e11df7907bc0e6 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 14:28:07 +0100 Subject: [PATCH 151/668] Update more tasks. --- LICENSE | 6 ++---- README.md | 16 ++++++---------- TO-DO.md | 9 +++++++++ gffcompare.wdl | 2 +- gridss.wdl | 8 ++++---- hisat2.wdl | 4 ++++ htseq.wdl | 3 +++ manta.wdl | 7 +++++++ requirements-test.txt | 11 ++++++++++- 9 files changed, 46 insertions(+), 20 deletions(-) diff --git a/LICENSE b/LICENSE index 37eeade5..b1f2b679 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,3 @@ -MIT License - Copyright (c) 2017 Leiden University Medical Center Permission is hereby granted, free of charge, to any person obtaining a copy @@ -9,8 +7,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, diff --git a/README.md b/README.md index 246e3814..4bc58367 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,21 @@ # Tasks - This repository contains the WDL task definitions used in the various [Biowdl](https://github.com/biowdl) workflows and pipelines. - ## Documentation - -Documentation for this workflow can be found +Documentation for this repository can be found [here](https://biowdl.github.io/tasks/). ## About -These tasks are part of [Biowdl](https://github.com/biowdl) -developed by [the SASC team](http://sasc.lumc.nl/). +These workflows are part of [Biowdl](https://github.com/biowdl) +developed by the SASC team at [Leiden University Medical Center](https://www.lumc.nl/). ## Contact -

-For any question related to these tasks, please use the +For any question related to Expression-Quantification, please use the github issue tracker -or contact - the SASC team directly at: +or contact the SASC team directly at: + sasc@lumc.nl.

diff --git a/TO-DO.md b/TO-DO.md index 8c1723b6..69f359bd 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,4 +1,12 @@ #TO DO +This file describes WDL files and tasks within those files which need +more specific attention than just adding outputs to the parameter_meta. + +Some tasks have not been updated to match the new SLURM requirements and are +missing a parameter_meta section. + +Some tasks are importing other WDL files. + ## Out of date with new cluster & parameter_meta: * bamstats.wdl: `Generate` * biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, @@ -9,6 +17,7 @@ `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` * fastqsplitter.wdl: `Fastqsplitter` * flash.wdl: `Flash` +* macs2.wdl: `PeakCalling` ## Imports other tasks: * bamstats.wdl diff --git a/gffcompare.wdl b/gffcompare.wdl index 221c4907..4b0d6d22 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -149,7 +149,7 @@ task GffCompare { stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."} tracking: {description: "File matching transcripts up between samples."} allFiles: {description: "A collection of all outputs files."} - redundant: {description: "File containing "duplicate"/"redundant" transcripts."} + redundant: {description: "File containing duplicate/redundant transcripts."} missedIntrons: {description: "File denoting missed introns."} } diff --git a/gridss.wdl b/gridss.wdl index 65579fd9..5f48afac 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -83,9 +83,9 @@ task GRIDSS { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - vcf: {description: ""} - vcfIndex: {description: ""} - assembly: {description: ""} - assemblyIndex: {description: ""} + vcf: {description: "VCF file including variant allele fractions."} + vcfIndex: {description: "Index of output VCF."} + assembly: {description: "The GRIDSS assembly BAM."} + assemblyIndex: {description: "Index of output BAM file."} } } diff --git a/hisat2.wdl b/hisat2.wdl index b52bf70f..a2c0777c 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -106,5 +106,9 @@ task Hisat2 { memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bamFile: {description: "Output BAM file."} + summaryFile: {description: "Alignment summary file."} } } diff --git a/htseq.wdl b/htseq.wdl index cf527535..dfa3fcf2 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -78,5 +78,8 @@ task HTSeqCount { memory: {description: "The amount of memory the job requires in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + counts: {description: "Count table based on input BAM file."} } } diff --git a/manta.wdl b/manta.wdl index a7b7cf38..4b7ea264 100644 --- a/manta.wdl +++ b/manta.wdl @@ -79,6 +79,10 @@ task Germline { memoryGb: {description: "The memory required to run the manta", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mantaVCF: {description: ""} + mantaVCFindex: {description: ""} } } @@ -155,5 +159,8 @@ task Somatic { memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + } } diff --git a/requirements-test.txt b/requirements-test.txt index f074413b..0b01d193 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,2 +1,11 @@ +# These are the programs used for testing these biowdl tasks. +# These requirements can be installed with conda with the bioconda channel +# activated. +# For more information on how to set up conda with bioconda channel see: +# http://bioconda.github.io/#install-conda +# This file can be installed with "conda install --file requirements-test.txt". + cromwell -miniwdl \ No newline at end of file +womtool +miniwdl +wdl-aid From 20ee22a6b9b2063cd900426b54549ba98d9f60d3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 14:32:17 +0100 Subject: [PATCH 152/668] Update README. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4bc58367..9d682de7 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,13 @@ Documentation for this repository can be found [here](https://biowdl.github.io/tasks/). ## About -These workflows are part of [Biowdl](https://github.com/biowdl) +These tasks are part of [Biowdl](https://github.com/biowdl) developed by the SASC team at [Leiden University Medical Center](https://www.lumc.nl/). ## Contact

-For any question related to Expression-Quantification, please use the +For any question related to these tasks, please use the github issue tracker or contact the SASC team directly at: From c9e62bf9a8b562ddee8ecd2ae12aa8784ed1d4ce Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 17:32:07 +0100 Subject: [PATCH 153/668] Update tasks. --- manta.wdl | 13 ++++++++++--- multiqc.wdl | 4 ++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/manta.wdl b/manta.wdl index 4b7ea264..1c949af2 100644 --- a/manta.wdl +++ b/manta.wdl @@ -81,8 +81,8 @@ task Germline { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - mantaVCF: {description: ""} - mantaVCFindex: {description: ""} + mantaVCF: {description: "SVs and indels scored and genotyped under a diploid model for the set of samples in a joint diploid sample analysis or for the normal sample in a tumor/normal subtraction analysis."} + mantaVCFindex: {description: "Index of output mantaVCF."} } } @@ -161,6 +161,13 @@ task Somatic { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - + candidateSmallIndelsVcf: {description: "Subset of the candidateSV.vcf.gz file containing only simple insertion and deletion variants less than the minimum scored variant size."} + candidateSmallIndelsVcfIndex: {description: "Index of output VCF file candidateSmallIndelsVcf."} + candidateSVVcf: {description: "Unscored SV and indel candidates."} + candidatSVVcfIndex: {description: "Index of output VCF file candidateSVVcf."} + tumorSVVcf: {description: "Subset of the candidateSV.vcf.gz file after removing redundant candidates and small indels less than the minimum scored variant size."} + tumorSVVcfIndex: {description: "Index of output VCF file tumorSVVcf."} + diploidSV: {description: "SVs and indels scored and genotyped under a diploid model for the set of samples in a joint diploid sample analysis or for the normal sample in a tumor/normal subtraction analysis."} + diploidSVindex: {description: "Index of output VCF file diploidSV."} } } diff --git a/multiqc.wdl b/multiqc.wdl index 647394e9..1d248dd6 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -177,5 +177,9 @@ task MultiQC { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + multiqcReport: {description: ""} + multiqcDataDirZip: {description: ""} } } From 100d8add0f092f9396be00cce03491cf0fffa654 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 10 Nov 2020 09:52:53 +0100 Subject: [PATCH 154/668] Add optional gvcf index input --- deepvariant.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 8b08e111..6a2b70b3 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -28,11 +28,11 @@ task RunDeepVariant { File inputBamIndex String modelType String outputVcf - String? postprocessVariantsExtraArgs File? customizedModel Int? numShards String? outputGVcf + String? outputGVcfIndex File? regions String? sampleName Boolean? VCFStatsReport = true @@ -44,6 +44,7 @@ task RunDeepVariant { command { set -e + /opt/deepvariant/bin/run_deepvariant \ --ref ~{referenceFasta} \ --reads ~{inputBam} \ @@ -66,10 +67,10 @@ task RunDeepVariant { output { File outputVCF = outputVcf - File outputVCFIndex = outputVCF + ".tbi" + File outputVCFIndex = outputVcf + ".tbi" Array[File] outputVCFStatsReport = glob("*.visual_report.html") File? outputGVCF = outputGVcf - File? outputGVCFIndex = outputGVcf + ".tbi" + File? outputGVCFIndex = outputGVcfIndex } parameter_meta { @@ -84,6 +85,7 @@ task RunDeepVariant { customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} + outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to outputGVcf+.tbi.", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} From 4836726ee8677c83d7cc1a1175be85435ab695bc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 10 Nov 2020 09:53:28 +0100 Subject: [PATCH 155/668] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bff5f3c..48d98036 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ DeepVariant: Add an optional input for the gvcf index. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. From aae72ec8459f36a3beb813c824553b56b1a820f5 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 13:09:12 +0100 Subject: [PATCH 156/668] Add more updates. --- TO-DO.md | 2 ++ deepvariant.wdl | 3 +-- multiqc.wdl | 4 ++-- pacbio.wdl | 8 +++++++- pbbam.wdl | 4 ++++ picard.wdl | 9 +++++++++ 6 files changed, 25 insertions(+), 5 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 69f359bd..7368005a 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -18,6 +18,8 @@ Some tasks are importing other WDL files. * fastqsplitter.wdl: `Fastqsplitter` * flash.wdl: `Flash` * macs2.wdl: `PeakCalling` +* ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` +* ## Imports other tasks: * bamstats.wdl diff --git a/deepvariant.wdl b/deepvariant.wdl index 618200aa..28aee813 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -44,7 +44,6 @@ task RunDeepVariant { command { set -e - /opt/deepvariant/bin/run_deepvariant \ --ref ~{referenceFasta} \ --reads ~{inputBam} \ @@ -85,7 +84,7 @@ task RunDeepVariant { customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} - outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to outputGVcf+.tbi.", category: "common"} + outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to `outputGVcf + '.tbi.'`", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} diff --git a/multiqc.wdl b/multiqc.wdl index 1d248dd6..405c0a0b 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -179,7 +179,7 @@ task MultiQC { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - multiqcReport: {description: ""} - multiqcDataDirZip: {description: ""} + multiqcReport: {description: "Results from bioinformatics analyses across many samples in a single report."} + multiqcDataDirZip: {description: "The parsed data directory compressed with zip."} } } diff --git a/pacbio.wdl b/pacbio.wdl index 01f6d4fd..df0343d9 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -43,7 +43,7 @@ task mergePacBio { } output { - File MergedReport = mergedReport + File outputMergedReport = mergedReport } parameter_meta { @@ -52,6 +52,9 @@ task mergePacBio { mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputMergedReport: {description: "The PacBio reports merged into one."} } } @@ -85,5 +88,8 @@ task ccsChunks { chunkCount: {description: "The number of chunks to create.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + chunks: {description: "The chunks created based on `chunkCount`."} } } diff --git a/pbbam.wdl b/pbbam.wdl index d271a11a..d893e64d 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -66,5 +66,9 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indexedBam: {description: "The original input BAM file."} + index: {description: "The index of the input BAM file."} } } diff --git a/picard.wdl b/picard.wdl index f1876f7b..d288f0e5 100644 --- a/picard.wdl +++ b/picard.wdl @@ -61,6 +61,9 @@ task BedToIntervalList { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + intervalList: {description: "Picard Interval List from a BED file."} } } @@ -125,6 +128,9 @@ task CollectHsMetrics { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + HsMetrics: {description: "Hybrid-selection (HS) metrics for the input BAM file."} } } @@ -240,6 +246,9 @@ task CollectMultipleMetrics { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + alignmentSummary: {description: ""} } } From f111c363b74ec64ee7ba06db7a7ad2b3f3ada05a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 17:00:31 +0100 Subject: [PATCH 157/668] Update style. --- CHANGELOG.md | 13 +++++-------- README.md | 16 ++++++++-------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 836af4ac..0b668ab1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,6 @@ Changelog ========== -For any question related to these tasks, please use the -github issue tracker +For any question related to Tasks, please use the +github issue tracker or contact the SASC team directly at: - + sasc@lumc.nl.

From b633bd790ee89de61a1673092b9d98cb4006d91e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 17:06:03 +0100 Subject: [PATCH 158/668] Update more tasks. --- picard.wdl | 31 +++++++++++++++++++++++++++++++ rtg.wdl | 6 ++++++ 2 files changed, 37 insertions(+) diff --git a/picard.wdl b/picard.wdl index d288f0e5..fd278958 100644 --- a/picard.wdl +++ b/picard.wdl @@ -249,6 +249,24 @@ task CollectMultipleMetrics { # outputs alignmentSummary: {description: ""} + baitBiasDetail: {description: ""} + baitBiasSummary: {description: ""} + baseDistributionByCycle: {description: ""} + baseDistributionByCyclePdf: {description: ""} + errorSummary: {description: ""} + gcBiasDetail: {description: ""} + gcBiasPdf: {description: ""} + gcBiasSummary: {description: ""} + insertSizeHistogramPdf: {description: ""} + insertSize: {description: ""} + preAdapterDetail: {description: ""} + preAdapterSummary: {description: ""} + qualityByCycle: {description: ""} + qualityByCyclePdf: {description: ""} + qualityDistribution: {description: ""} + qualityDistributionPdf: {description: ""} + qualityYield: {description: ""} + allStats: {description: ""} } } @@ -301,6 +319,10 @@ task CollectRnaSeqMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + metrics: {description: ""} + chart: {description: ""} } } @@ -361,6 +383,11 @@ task CollectTargetedPcrMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + perTargetCoverage: {description: ""} + perBaseCoverage: {description: ""} + metrics: {description: ""} } } @@ -410,6 +437,10 @@ task CollectVariantCallingMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + details: {description: ""} + summary: {description: ""} } } diff --git a/rtg.wdl b/rtg.wdl index bfd32957..a6f8da52 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -59,6 +59,9 @@ task Format { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + sdf: {description: ""} } } @@ -161,5 +164,8 @@ task VcfEval { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + } } From 8c42b1e1de607623fa00863472c9570158e6e495 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 17:49:13 +0100 Subject: [PATCH 159/668] Update more tasks. --- TO-DO.md | 2 +- rtg.wdl | 18 +++++++++++++++--- smoove.wdl | 3 +++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 7368005a..e9824dfb 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -19,7 +19,7 @@ Some tasks are importing other WDL files. * flash.wdl: `Flash` * macs2.wdl: `PeakCalling` * ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` -* +* seqtk.wdl: `Sample` ## Imports other tasks: * bamstats.wdl diff --git a/rtg.wdl b/rtg.wdl index a6f8da52..0e86ce3f 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -52,7 +52,7 @@ task Format { parameter_meta { # inputs - inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"} + inputFiles: {description: "Input sequence files. May be specified 1 or more times.", category: "required"} format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", category: "advanced"} outputPath: {description: "Where the output should be placed.", category: "advanced"} rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"} @@ -61,7 +61,7 @@ task Format { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - sdf: {description: ""} + sdf: {description: "RTGSequence Data File (SDF) format version of the input file(s)."} } } @@ -166,6 +166,18 @@ task VcfEval { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - + falseNegativesVcf: {description: "Variants from thebaselineVCF which were not correctly called."} + falseNegativesVcfIndex: {description: "Index of the output VCF file `falseNegativesVcf`."} + falsePositivesVcf: {description: "Variants from thecallsVCF which do not agree with baseline variants."} + falsePositivesVcfIndex: {description: "Index of the output VCF file `falsePositivesVcf`."} + summary: {description: "Summary statistic file."} + truePositivesBaselineVcf: {description: "Variants from thebaselineVCF which agree with variants in thecalls VCF."} + truePositivesBaselineVcfIndex: {description: "Index of the output VCF file `truePositivesBaselineVcf`."} + truePositivesVcf: {description: "Variants from thecallsVCF which agree with variants in the baseline VCF."} + truePositivesVcfIndex: {description: "Index of the output VCF file `truePositivesVcf`."} + nonSnpRoc: {description: "ROC data derived from those variants which were not represented asSNPs."} + phasing: {description: "Phasing file."} + weightedRoc: {description: "ROC data derived from all analyzed call variants, regardless of their representation."} + allStats: {description: "All output files combined in a array."} } } diff --git a/smoove.wdl b/smoove.wdl index 244c2cac..cafc6b08 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -65,5 +65,8 @@ task Call { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + smooveVcf: {description: "Calls and genotyping of structural variants in VCF file." } } From 3eeef3f777e4863f9da50e51f9ed0699578d28c2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 10:01:21 +0100 Subject: [PATCH 160/668] Update 2 new tasks. --- samtools.wdl | 35 +++++++++++++++++++++++++++++++++++ somaticseq.wdl | 6 ++++++ 2 files changed, 41 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index e274cf58..fcd996c7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -56,6 +56,10 @@ task BgzipAndIndex { type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + compressed: {description: ""} + index: {description: ""} } } @@ -161,6 +165,11 @@ task Fastq { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + read1: {description: ""} + read2: {description: ""} + read0: {description: ""} } } @@ -203,6 +212,10 @@ task FilterShortReadsBam { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + filteredBam: {description: ""} + filteredBamIndex: {description: ""} } } @@ -239,6 +252,9 @@ task Flagstat { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + flagstat: {description: ""} } } @@ -288,6 +304,10 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indexedBam: {description: ""} + index: {description: ""} } } @@ -321,6 +341,9 @@ task Markdup { outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} } } @@ -370,6 +393,10 @@ task Merge { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} } } @@ -474,6 +501,10 @@ task Tabix { type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indexedFile: {description: ""} + index: {description: ""} } } @@ -540,5 +571,9 @@ task View { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} } } diff --git a/somaticseq.wdl b/somaticseq.wdl index 1c73fc58..0cd944c6 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -123,6 +123,12 @@ task ParallelPaired { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indels: {description: ""} + snvs: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} } } From e2461ff107d8d070c063ea47a782929e95bebb80 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 10:27:49 +0100 Subject: [PATCH 161/668] Fix travis error. --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index cafc6b08..82079b2f 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -67,6 +67,6 @@ task Call { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls and genotyping of structural variants in VCF file." + smooveVcf: {description: "Calls and genotyping of structural variants in VCF file."} } } From 7dff854e906a14db3f69647b5f35596a9687d703 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 13:45:43 +0100 Subject: [PATCH 162/668] Update even more outputs. --- TO-DO.md | 11 ++++++----- somaticseq.wdl | 25 +++++++++++++++++++++++++ star.wdl | 22 ++++++++++++++++++++++ strelka.wdl | 10 ++++++++++ stringtie.wdl | 7 +++++++ survivor.wdl | 3 +++ umi-tools.wdl | 11 +++++++++++ vardict.wdl | 3 +++ vt.wdl | 3 +++ 9 files changed, 90 insertions(+), 5 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index e9824dfb..b54d995c 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -20,6 +20,9 @@ Some tasks are importing other WDL files. * macs2.wdl: `PeakCalling` * ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` * seqtk.wdl: `Sample` +* spades.wdl: `Spades` +* unicycler.wdl: `Unicycler` + ## Imports other tasks: * bamstats.wdl @@ -27,9 +30,7 @@ Some tasks are importing other WDL files. * sampleconfig.wdl * seqstat.wdl * clever.wdl +* strelka.wdl -## Requires parameter_meta: -* - -## Duplicate tasks: -* +## Requires input from others: +* somaticseq.wdl diff --git a/somaticseq.wdl b/somaticseq.wdl index 0cd944c6..07103ef9 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -234,6 +234,14 @@ task ParallelPairedTrain { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + consensusIndels: {description: ""} + consensusSNV: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} + ensembleIndelsClassifier: {description: ""} + ensembleSNVClassifier: {description: ""} } } @@ -317,6 +325,12 @@ task ParallelSingle { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indels: {description: ""} + snvs: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} } } @@ -399,6 +413,14 @@ task ParallelSingleTrain { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + consensusIndels: {description: ""} + consensusSNV: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} + ensembleIndelsClassifier: {description: ""} + ensembleSNVClassifier: {description: ""} } } @@ -435,5 +457,8 @@ task ModifyStrelka { outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: ""} } } diff --git a/star.wdl b/star.wdl index 68193fcd..aa1fd608 100644 --- a/star.wdl +++ b/star.wdl @@ -86,6 +86,24 @@ task GenomeGenerate { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + chrLength: {description: "Text chromosome lengths file."} + chrNameLength: {description: "Text chromosome name lengths file."} + chrName: {description: "Text chromosome names file."} + chrStart: {description: "Chromosome start sites file."} + genome: {description: "Binary genome sequence file."} + genomeParameters: {description: "Genome parameters file."} + sa: {description: "Suffix arrays file."} + saIndex: {description: "Index file of suffix arrays."} + exonGeTrInfo: {description: "Exon, gene and transcript information file."} + exonInfo: {description: "Exon information file."} + geneInfo: {description: "Gene information file."} + sjdbInfo: {description: "Splice junctions coordinates file."} + sjdbListFromGtfOut: {description: "Splice junctions from input GTF file."} + sjdbListOut: {description: "Splice junction list file."} + transcriptInfo: {description: "Transcripts information file."} + starIndex: {description: "A collection of all STAR index files."} } } @@ -181,6 +199,10 @@ task Star { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bamFile: {description: "Alignment file."} + logFinalOut: {description: "Log information file."} } } diff --git a/strelka.wdl b/strelka.wdl index f4b9888b..be08e386 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -83,6 +83,10 @@ task Germline { memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + variants: {description: "Output VCF file."} + variantsIndex: {description: "Index of output VCF file."} } } @@ -158,6 +162,12 @@ task Somatic { memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indelsVcf: {description: "VCF containing INDELS."} + indelsIndex: {description: "Index of output `indelsVcf`."} + variants: {description: "VCF containing variants."} + variantsIndex: {description: "Index of output `variants`."} } meta { diff --git a/stringtie.wdl b/stringtie.wdl index fff4140c..05df05c6 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -78,6 +78,10 @@ task Stringtie { memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + assembledTranscripts: {description: "GTF file containing the assembled transcripts."} + geneAbundance: {description: "Gene abundances in tab-delimited format."} } } @@ -141,5 +145,8 @@ task Merge { memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedGtfFile: {description: "A merged GTF file from a set of GTF files."} } } diff --git a/survivor.wdl b/survivor.wdl index c7b31058..8b0360d8 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -74,5 +74,8 @@ task Merge { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedVcf: {description: "All the vcf files specified in fileList merged."} } } diff --git a/umi-tools.wdl b/umi-tools.wdl index 7b0a3991..6524d656 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -66,6 +66,10 @@ task Extract { threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + extractedRead1: {description: "First read with UMI extracted to read name."} + extractedRead2: {description: "Second read with UMI extracted to read name."} } } @@ -124,5 +128,12 @@ task Dedup { memory: {description: "The amount of memory required for the task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + deduppedBam: {description: "Deduplicated BAM file."} + deduppedBamIndex: {description: "Index of the deduplicated BAM file."} + editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."} + umiStats: {description: "UMI-level summary statistics."} + positionStats: {description: "The counts for unique combinations of UMI and position."} } } diff --git a/vardict.wdl b/vardict.wdl index fc37c9ef..1c20e51c 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -118,5 +118,8 @@ task VarDict { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + vcfFile: {description: "Output VCF file."} } } diff --git a/vt.wdl b/vt.wdl index 95585ff2..94414050 100644 --- a/vt.wdl +++ b/vt.wdl @@ -64,5 +64,8 @@ task Normalize { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Output VCF file."} } } From 7d76ed6c3e0bfa5ab679deb54ef24da0955d1ed0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 14:45:04 +0100 Subject: [PATCH 163/668] Update TO-DO. --- TO-DO.md | 2 +- whatshap.wdl | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/TO-DO.md b/TO-DO.md index b54d995c..9a7db355 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -22,7 +22,7 @@ Some tasks are importing other WDL files. * seqtk.wdl: `Sample` * spades.wdl: `Spades` * unicycler.wdl: `Unicycler` - +* wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` ## Imports other tasks: * bamstats.wdl diff --git a/whatshap.wdl b/whatshap.wdl index 5c69400a..7307ce7c 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -92,6 +92,10 @@ task Phase { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + phasedVCF: {description: "VCF file containing phased variants."} + phasedVCFIndex: {description: "Index of phased VCF file."} } } @@ -144,6 +148,11 @@ task Stats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + phasedGTF: {description: "Phasing statistics for a single VCF file."} + phasedTSV: {description: "Statistics in a tab-separated value format."} + phasedBlockList: {description: "List of the total number of phase sets/blocks."} } } @@ -204,5 +213,9 @@ task Haplotag { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bam: {description: "BAM file containing tagged reads for haplotype."} + bamIndex: {description: "Index of the tagged BAM file."} } } From 7dab07f86c611fdc26fe5863ae6eb5d155be430f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 16:46:21 +0100 Subject: [PATCH 164/668] Update picard. --- TO-DO.md | 1 + picard.wdl | 54 +++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 9a7db355..9216bc0c 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -23,6 +23,7 @@ Some tasks are importing other WDL files. * spades.wdl: `Spades` * unicycler.wdl: `Unicycler` * wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` +* picard.wdl: `ScatterIntervalList` ## Imports other tasks: * bamstats.wdl diff --git a/picard.wdl b/picard.wdl index fd278958..2005fe28 100644 --- a/picard.wdl +++ b/picard.wdl @@ -321,8 +321,8 @@ task CollectRnaSeqMetrics { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - metrics: {description: ""} - chart: {description: ""} + metrics: {description: "Metrics describing the distribution of bases within the transcripts."} + chart: {description: "Plot of normalized position vs. coverage."} } } @@ -385,9 +385,9 @@ task CollectTargetedPcrMetrics { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - perTargetCoverage: {description: ""} - perBaseCoverage: {description: ""} - metrics: {description: ""} + perTargetCoverage: {description: "Per target coverage information."} + perBaseCoverage: {description: "Per base coverage information to."} + metrics: {description: "File containing metrics."} } } @@ -539,6 +539,11 @@ task GatherBamFiles { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} + outputBamMd5: {description: ""} } } @@ -582,6 +587,9 @@ task GatherVcfs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: ""} } } @@ -666,6 +674,12 @@ task MarkDuplicates { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} + outputBamMd5: {description: ""} + metricsFile: {description: ""} } } @@ -725,6 +739,10 @@ task MergeVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Multiple variant files combined into a single variant file."} + outputVcfIndex: {description: "Index of `outputVcf`."} } } @@ -768,6 +786,21 @@ task SamToFastq { docker: dockerImage } + parameter_meta { + # inputs + inputBam: {description: "Input BAM file to extract reads from.", category: "required"} + inputBamIndex: {description: "Input BAM index file.", category: "required"} + paired: {description: "Set to false when input data is single-end.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + read1: {description: "Fastq file containing reads from the first pair."} + read2: {description: "Fastq file containing reads from the second pair."} + unpairedRead: {description: "Fastq file containing unpaired reads."} + meta { WDL_AID: { exclude: ["noneFile"] @@ -864,6 +897,10 @@ task SortSam { XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Index of sorted BAM file."} } } @@ -911,6 +948,10 @@ task SortVcf { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file(s)."} + outputVcfIndex: {description: "Index(es) of sort(ed) VCF file(s)."} } } @@ -955,5 +996,8 @@ task RenameSample { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + renamedVcf: {description: "New VCF with renamed sample."} } } From b22cc9f9ba8e20e4685005bede66fe0dc129ccd8 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 17:02:49 +0100 Subject: [PATCH 165/668] Fix travis error. --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 2005fe28..cc2634f0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -800,6 +800,7 @@ task SamToFastq { read1: {description: "Fastq file containing reads from the first pair."} read2: {description: "Fastq file containing reads from the second pair."} unpairedRead: {description: "Fastq file containing unpaired reads."} + } meta { WDL_AID: { From ccc38727ddf49a3cebb566fadf7145675b0eafa2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 17:19:50 +0100 Subject: [PATCH 166/668] Update samtools. --- samtools.wdl | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index fcd996c7..0aecf4ee 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -58,8 +58,8 @@ task BgzipAndIndex { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - compressed: {description: ""} - index: {description: ""} + compressed: {description: "Compressed input file."} + index: {description: "Index of the compressed input file."} } } @@ -167,9 +167,9 @@ task Fastq { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - read1: {description: ""} - read2: {description: ""} - read0: {description: ""} + read1: {description: "Reads with the READ1 FLAG set."} + read2: {description: "Reads with the READ2 FLAG set."} + read0: {description: "Reads with either READ1 FLAG or READ2 flag set."} } } @@ -214,8 +214,8 @@ task FilterShortReadsBam { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - filteredBam: {description: ""} - filteredBamIndex: {description: ""} + filteredBam: {description: "BAM file filtered for short reads."} + filteredBamIndex: {description: "Index of filtered BAM file."} } } @@ -254,7 +254,7 @@ task Flagstat { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - flagstat: {description: ""} + flagstat: {description: "The number of alignments for each FLAG type."} } } @@ -306,8 +306,8 @@ task Index { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - indexedBam: {description: ""} - index: {description: ""} + indexedBam: {description: "BAM file that was indexed."} + index: {description: "Index of the input BAM file."} } } @@ -343,7 +343,7 @@ task Markdup { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} + outputBam: {description: "BAM file with duplicate alignments marked."} } } @@ -395,8 +395,8 @@ task Merge { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} - outputBamIndex: {description: ""} + outputBam: {description: "Multiple BAM files merged into one."} + outputBamIndex: {description: "Index of the merged BAM file."} } } @@ -503,8 +503,8 @@ task Tabix { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - indexedFile: {description: ""} - index: {description: ""} + indexedFile: {description: "Indexed input file."} + index: {description: "Index of the input file."} } } @@ -573,7 +573,7 @@ task View { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} - outputBamIndex: {description: ""} + outputBam: {description: "Processed input file."} + outputBamIndex: {description: "Index of the processed input file."} } } From 4c56f143264390a79319c7c85e3dcca7732fb0f2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 17:24:47 +0100 Subject: [PATCH 167/668] Update picard. --- picard.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/picard.wdl b/picard.wdl index cc2634f0..20fd1f95 100644 --- a/picard.wdl +++ b/picard.wdl @@ -541,9 +541,9 @@ task GatherBamFiles { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} - outputBamIndex: {description: ""} - outputBamMd5: {description: ""} + outputBam: {description: "Concatenated BAM files."} + outputBamIndex: {description: "Index of the output `outputBam`."} + outputBamMd5: {description: "MD5 of the output `outputBam`."} } } @@ -589,7 +589,7 @@ task GatherVcfs { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: ""} + outputVcf: {description: "Multiple VCF files gathered into one file."} } } From 19610fe328fbfee31e922684663d9a190e631194 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Thu, 12 Nov 2020 12:03:18 +0100 Subject: [PATCH 168/668] Update smoove.wdl --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 82079b2f..e5c5348f 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -67,6 +67,6 @@ task Call { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls and genotyping of structural variants in VCF file."} + smooveVcf: {description: "Calls of structural variants in VCF file."} } } From 7aea19d5feeab4aa5ff5a035216157d375dad116 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Thu, 12 Nov 2020 12:11:18 +0100 Subject: [PATCH 169/668] Update vt.wdl --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 94414050..85077dae 100644 --- a/vt.wdl +++ b/vt.wdl @@ -66,6 +66,6 @@ task Normalize { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Output VCF file."} + outputVcf: {description: "Normalized & decomposed VCF file."} } } From c3255755087999b129670fda036bfbe4fe6771d6 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Nov 2020 12:13:56 +0100 Subject: [PATCH 170/668] Update gffcompare. --- biopet/bamstats.wdl | 70 ----- biopet/biopet.wdl | 552 ---------------------------------------- biopet/sampleconfig.wdl | 143 ----------- biopet/seqstat.wdl | 64 ----- gffcompare.wdl | 6 +- 5 files changed, 3 insertions(+), 832 deletions(-) delete mode 100644 biopet/bamstats.wdl delete mode 100644 biopet/biopet.wdl delete mode 100644 biopet/sampleconfig.wdl delete mode 100644 biopet/seqstat.wdl diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl deleted file mode 100644 index d01bc10c..00000000 --- a/biopet/bamstats.wdl +++ /dev/null @@ -1,70 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" as common - -task Generate { - input { - IndexedBamFile bam - Boolean scatterMode = false - Boolean onlyUnmapped = false - Boolean tsvOutputs = false - String outputDir - - String? preCommand - File? toolJar - File? bedFile - Reference? reference - - String javaXmx = "8G" - String memory = "9G" - } - - File referenceFasta = if defined(reference) then select_first([reference]).fasta else "" - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-bamstats -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p ~{outputDir} - ~{toolCommand} Generate \ - --bam ~{bam.file} \ - ~{"--bedFile " + bedFile} \ - ~{true="--reference" false="" defined(reference)} ~{referenceFasta} \ - ~{true="--onlyUnmapped" false="" onlyUnmapped} \ - ~{true="--scatterMode" false="" scatterMode} \ - ~{true="--tsvOutputs" false="" tsvOutputs} \ - --outputDir ~{outputDir} - } - - output { - File json = outputDir + "/bamstats.json" - File summaryJson = outputDir + "/bamstats.summary.json" - } - - runtime { - memory: memory - } -} diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl deleted file mode 100644 index e6619e09..00000000 --- a/biopet/biopet.wdl +++ /dev/null @@ -1,552 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" - -task BaseCounter { - input { - IndexedBamFile bam - File refFlat - String outputDir - String prefix - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-basecounter -Xmx~{javaXmx}" - - command { - set -e -o pipefail - mkdir -p ~{outputDir} - ~{preCommand} - ~{toolCommand} \ - -b ~{bam.file} \ - -r ~{refFlat} \ - -o ~{outputDir} \ - -p ~{prefix} - } - - output { - File exonAntisense = outputDir + "/" + prefix + ".base.exon.antisense.counts" - File exon = outputDir + "/" + prefix + ".base.exon.counts" - File exonMergeAntisense = outputDir + "/" + prefix + ".base.exon.merge.antisense.counts" - File exonMerge = outputDir + "/" + prefix + ".base.exon.merge.counts" - File exonMergeSense = outputDir + "/" + prefix + ".base.exon.merge.sense.counts" - File exonSense = outputDir + "/" + prefix + ".base.exon.sense.counts" - File geneAntisense = outputDir + "/" + prefix + ".base.gene.antisense.counts" - File gene = outputDir + "/" + prefix + ".base.gene.counts" - File geneExonicAntisense = outputDir + "/" + prefix + ".base.gene.exonic.antisense.counts" - File geneExonic = outputDir + "/" + prefix + ".base.gene.exonic.counts" - File geneExonicSense = outputDir + "/" + prefix + ".base.gene.exonic.sense.counts" - File geneIntronicAntisense = outputDir + "/" + prefix + ".base.gene.intronic.antisense.counts" - File geneIntronic = outputDir + "/" + prefix + ".base.gene.intronic.counts" - File geneIntronicSense = outputDir + "/" + prefix + ".base.gene.intronic.sense.counts" - File geneSense = outputDir + "/" + prefix + ".base.gene.sense.counts" - File intronAntisense = outputDir + "/" + prefix + ".base.intron.antisense.counts" - File intron = outputDir + "/" + prefix + ".base.intron.counts" - File intronMergeAntisense = outputDir + "/" + prefix + ".base.intron.merge.antisense.counts" - File intronMerge = outputDir + "/" + prefix + ".base.intron.merge.counts" - File intronMergeSense = outputDir + "/" + prefix + ".base.intron.merge.sense.counts" - File intronSense = outputDir + "/" + prefix + ".base.intron.sense.counts" - File metaExonsNonStranded = outputDir + "/" + prefix + ".base.metaexons.non_stranded.counts" - File metaExonsStrandedAntisense = outputDir + "/" + prefix + ".base.metaexons.stranded.antisense.counts" - File metaExonsStranded = outputDir + "/" + prefix + ".base.metaexons.stranded.counts" - File metaExonsStrandedSense = outputDir + "/" + prefix + ".base.metaexons.stranded.sense.counts" - File transcriptAntisense = outputDir + "/" + prefix + ".base.transcript.antisense.counts" - File transcript = outputDir + "/" + prefix + ".base.transcript.counts" - File transcriptExonicAntisense = outputDir + "/" + prefix + ".base.transcript.exonic.antisense.counts" - File transcriptExonic = outputDir + "/" + prefix + ".base.transcript.exonic.counts" - File transcriptExonicSense = outputDir + "/" + prefix + ".base.transcript.exonic.sense.counts" - File transcriptIntronicAntisense = outputDir + "/" + prefix + ".base.transcript.intronic.antisense.counts" - File transcriptIntronic = outputDir + "/" + prefix + ".base.transcript.intronic.counts" - File transcriptIntronicSense = outputDir + "/" + prefix + ".base.transcript.intronic.sense.counts" - File transcriptSense = outputDir + "/" + prefix + ".base.transcript.sense.counts" - } - - runtime { - memory: memory - } -} - -task ExtractAdaptersFastqc { - input { - File inputFile - String outputDir - String adapterOutputFilePath = outputDir + "/adapter.list" - String contamsOutputFilePath = outputDir + "/contaminations.list" - - Boolean? skipContams - File? knownContamFile - File? knownAdapterFile - Float? adapterCutoff - Boolean? outputAsFasta - - String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" - } - - command { - set -e - mkdir -p ~{outputDir} - biopet-extractadaptersfastqc -Xmx~{javaXmx} \ - --inputFile ~{inputFile} \ - ~{"--adapterOutputFile " + adapterOutputFilePath } \ - ~{"--contamsOutputFile " + contamsOutputFilePath } \ - ~{"--knownContamFile " + knownContamFile} \ - ~{"--knownAdapterFile " + knownAdapterFile} \ - ~{"--adapterCutoff " + adapterCutoff} \ - ~{true="--skipContams" false="" skipContams} \ - ~{true="--outputAsFasta" false="" outputAsFasta} - } - - output { - File adapterOutputFile = adapterOutputFilePath - File contamsOutputFile = contamsOutputFilePath - Array[String] adapterList = read_lines(adapterOutputFile) - Array[String] contamsList = read_lines(contamsOutputFile) - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - # inputs - inputFile: {description: "Input fastq file.", category: "required"} - outputDir: {description: "The path to which the output should be written.", category: "required"} - adapterOutputFilePath: {description: "Output file for adapters, if not supplied output will go to stdout.", category: "common"} - contamsOutputFilePath: {description: "Output file for contaminations, if not supplied output will go to stdout.", category: "common"} - skipContams: {description: "If this is set only the adapters block is used, other wise contaminations is also used.", category: "advanced"} - knownContamFile: {description: "This file should contain the known contaminations from fastqc.", category: ""advanced} - knownAdapterFile: {description: "This file should contain the known adapters from fastqc.", category: "advanced"} - adapterCutoff: {description: "The fraction of the adapters in a read should be above this fraction, default is 0.001.", category: "advanced"} - outputAsFasta: {description: "Output in fasta format, default only sequences.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # outputs - adapterOutputFile: {description: "Output file with adapters."} - contamsOutputFile: {description: "Output file with contaminations."} - adapterList: {description: "List of adapters."} - contamsList: {description: "List of contaminations."} - } -} - -task FastqSplitter { - input { - File inputFastq - Array[String]+ outputPaths - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - String dockerImage = "quay.io/biocontainers/biopet-fastqsplitter:0.1--2" - } - - command { - set -e - mkdir -p $(dirname ~{sep=') $(dirname ' outputPaths}) - biopet-fastqsplitter -Xmx~{javaXmx} \ - -I ~{inputFastq} \ - -o ~{sep=' -o ' outputPaths} - } - - output { - Array[File] chunks = outputPaths - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task FastqSync { - input { - FastqPair refFastq - FastqPair inputFastq - String out1path - String out2path - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-fastqsync -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{out1path}) $(dirname ~{out2path}) - ~{toolCommand} \ - --in1 ~{inputFastq.R1} \ - --in2 ~{inputFastq.R2} \ - --ref1 ~{refFastq.R1} \ - --ref2 ~{refFastq.R2} \ - --out1 ~{out1path} \ - --out2 ~{out2path} - } - - output { - FastqPair out1 = object { - R1: out1path, - R2: out2path - } - } - - runtime { - memory: memory - } -} - -task ScatterRegions { - input { - File referenceFasta - File referenceFastaDict - Int scatterSizeMillions = 1000 - Boolean notSplitContigs = false - - Int? scatterSize - File? regions - File? bamFile - File? bamIndex - - String javaXmx = "500M" - String memory = "1G" - Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" - } - - # OutDirPath must be defined here because the glob process relies on - # linking. This path must be in the containers filesystem, otherwise the - # linking does not work. - String outputDirPath = "scatters" - String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" - - command <<< - set -e -o pipefail - mkdir -p ~{outputDirPath} - biopet-scatterregions -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -R ~{referenceFasta} \ - -o ~{outputDirPath} \ - ~{"-s " + finalSize} \ - ~{"-L " + regions} \ - ~{"--bamFile " + bamFile} \ - ~{true="--notSplitContigs" false="" notSplitContigs} - - # Glob messes with order of scatters (10 comes before 1), which causes - # problems at gatherGvcfs - # Therefore we reorder the scatters with python. - python << CODE - import os - scatters = os.listdir("~{outputDirPath}") - splitext = [ x.split(".") for x in scatters] - splitnum = [x.split("-") + [y] for x,y in splitext] - ordered = sorted(splitnum, key=lambda x: int(x[1])) - merged = ["~{outputDirPath}/{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] - for x in merged: - print(x) - CODE - >>> - - output { - Array[File] scatters = read_lines(stdout()) - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - # inputs - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} - notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", category: "advanced"} - scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} - regions: {description: "The regions to be scattered.", category: "advanced"} - bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", category: "advanced"} - bamIndex: {description: "The index for the bamfile given through bamFile.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # outputs - scatters: {description: "Smaller scatter regions of equal size."} - } -} - -task ValidateAnnotation { - input { - Reference reference - - File? refRefflat - File? gtfFile - - String javaXmx = "3G" - String memory = "4G" - String dockerImage = "quay.io/biocontainers/biopet-validateannotation:0.1--0" - } - - command { - biopet-validateannotation -Xmx~{javaXmx} \ - ~{"-r " + refRefflat} \ - ~{"-g " + gtfFile} \ - -R ~{reference.fasta} - } - - output { - File stderr = stderr() - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task ValidateFastq { - input { - File read1 - File? read2 - - String javaXmx = "3G" - String memory = "4G" - String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--1" - } - - command { - biopet-validatefastq -Xmx~{javaXmx} \ - --fastq1 ~{read1} \ - ~{"--fastq2 " + read2} - } - - output { - File stderr = stderr() - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task ValidateVcf { - input { - IndexedVcfFile vcf - Reference reference - - String javaXmx = "3G" - String memory = "4G" - String dockerImage = "quay.io/biocontainers/biopet-validatevcf:0.1--0" - } - - command { - biopet-validatevcf -Xmx~{javaXmx} \ - -i ~{vcf.file} \ - -R ~{reference.fasta} - } - - output { - File stderr = stderr() - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task VcfStats { - input { - IndexedVcfFile vcf - Reference reference - String outputDir - Boolean writeBinStats = false - Int localThreads = 1 - Boolean notWriteContigStats = false - Boolean skipGeneral = false - Boolean skipGenotype = false - Boolean skipSampleDistributions = false - Boolean skipSampleCompare = false - - File? intervals - Array[String]+? infoTags - Array[String]+? genotypeTags - Int? sampleToSampleMinDepth - Int? binSize - Int? maxContigsInSingleJob - String? sparkMaster - Int? sparkExecutorMemory - Array[String]+? sparkConfigValues - - String javaXmx = "4G" - String memory = "5G" - String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" - } - - command { - set -e - mkdir -p ~{outputDir} - biopet-vcfstats -Xmx~{javaXmx} \ - -I ~{vcf.file} \ - -R ~{reference.fasta} \ - -o ~{outputDir} \ - -t ~{localThreads} \ - ~{"--intervals " + intervals} \ - ~{true="--infoTag" false="" defined(infoTags)} ~{sep=" --infoTag " infoTags} \ - ~{true="--genotypeTag" false="" defined(genotypeTags)} ~{sep=" --genotypeTag " - genotypeTags} \ - ~{"--sampleToSampleMinDepth " + sampleToSampleMinDepth} \ - ~{"--binSize " + binSize} \ - ~{"--maxContigsInSingleJob " + maxContigsInSingleJob} \ - ~{true="--writeBinStats" false="" writeBinStats} \ - ~{true="--notWriteContigStats" false="" notWriteContigStats} \ - ~{true="--skipGeneral" false="" skipGeneral} \ - ~{true="--skipGenotype" false="" skipGenotype} \ - ~{true="--skipSampleDistributions" false="" skipSampleDistributions} \ - ~{true="--skipSampleCompare" false="" skipSampleCompare} \ - ~{"--sparkMaster " + sparkMaster} \ - ~{"--sparkExecutorMemory " + sparkExecutorMemory} \ - ~{true="--sparkConfigValue" false="" defined(sparkConfigValues)} ~{ - sep=" --sparkConfigValue" sparkConfigValues} - } - - output { - File? general = outputDir + "/general.tsv" - File? genotype = outputDir + "/genotype.tsv" - File? sampleDistributionAvailableAggregate = outputDir + - "/sample_distributions/Available.aggregate.tsv" - File? sampleDistributionAvailable = outputDir + "/sample_distributions/Available.tsv" - File? sampleDistributionCalledAggregate = outputDir + - "/sample_distributions/Called.aggregate.tsv" - File? sampleDistributionCalled = outputDir + "/sample_distributions/Called.tsv" - File? sampleDistributionFilteredAggregate = outputDir + - "/sample_distributions/Filtered.aggregate.tsv" - File? sampleDistributionFiltered = outputDir + "/sample_distributions/Filtered.tsv" - File? sampleDistributionHetAggregate = outputDir + "/sample_distributions/Het.aggregate.tsv" - File? sampleDistributionHetNoNRefAggregate = outputDir + - "/sample_distributions/HetNonRef.aggregate.tsv" - File? sampleDistributionHetNonRef = outputDir + "/sample_distributions/HetNonRef.tsv" - File? sampleDistributionHet = outputDir + "/sample_distributions/Het.tsv" - File? sampleDistributionHomAggregate = outputDir + "/sample_distributions/Hom.aggregate.tsv" - File? sampleDistributionHomRefAggregate = outputDir + - "/sample_distributions/HomRef.aggregate.tsv" - File? sampleDistributionHomRef = outputDir + "/sample_distributions/HomRef.tsv" - File? sampleDistributionHom = outputDir + "/sample_distributions/Hom.tsv" - File? sampleDistributionHomVarAggregate = outputDir + - "/sample_distributions/HomVar.aggregate.tsv" - File? sampleDistributionHomVar = outputDir + "/sample_distributions/HomVar.tsv" - File? sampleDistributionMixedAggregate = outputDir + - "/sample_distributions/Mixed.aggregate.tsv" - File? sampleDistributionMixed = outputDir + "/sample_distributions/Mixed.tsv" - File? sampleDistributionNoCallAggregate = outputDir + - "/sample_distributions/NoCall.aggregate.tsv" - File? sampleDistributionNoCall = outputDir + "/sample_distributions/NoCall.tsv" - File? sampleDistributionNonInformativeAggregate = outputDir + - "/sample_distributions/NonInformative.aggregate.tsv" - File? sampleDistributionNonInformative = outputDir + - "/sample_distributions/NonInformative.tsv" - File? sampleDistributionToalAggregate = outputDir + - "/sample_distributions/Total.aggregate.tsv" - File? sampleDistributionTotal = outputDir + "/sample_distributions/Total.tsv" - File? sampleDistributionVariantAggregate = outputDir + - "/sample_distributions/Variant.aggregate.tsv" - File? sampleDistributionVariant = outputDir + "/sample_distributions/Variant.tsv" - File? sampleCompareAlleleAbs = outputDir + "/sample_compare/allele.abs.tsv" - File? sampleCompareAlleleNonRefAbs = outputDir + "/sample_compare/allele.non_ref.abs.tsv" - File? sampleCompareAlleleRefAbs = outputDir + "/sample_compare/allele.ref.abs.tsv" - File? sampleCompareAlleleRel = outputDir + "/sample_compare/allele.rel.tsv" - File? sampleCompareGenotypeAbs = outputDir + "/sample_compare/genotype.abs.tsv" - File? sampleCompareGenotypeNonRefAbs = outputDir + - "/sample_compare/genotype.non_ref.abs.tsv" - File? sampleCompareGenotypeRefAbs = outputDir + "/sample_compare/genotype.ref.abs.tsv" - File? sampleCompareGenotypeRel = outputDir + "/sample_compare/genotype.rel.tsv" - # A glob is easier, but duplicates all the outputs - Array[File] allStats = select_all([ - general, - genotype, - sampleDistributionAvailableAggregate, - sampleDistributionAvailable, - sampleDistributionCalledAggregate, - sampleDistributionCalled, - sampleDistributionFilteredAggregate, - sampleDistributionFiltered, - sampleDistributionHetAggregate, - sampleDistributionHetNoNRefAggregate, - sampleDistributionHetNonRef, - sampleDistributionHet, - sampleDistributionHomAggregate, - sampleDistributionHomRefAggregate, - sampleDistributionHomRef, - sampleDistributionHom, - sampleDistributionHomVarAggregate, - sampleDistributionHomVar, - sampleDistributionMixedAggregate, - sampleDistributionMixed, - sampleDistributionNoCallAggregate, - sampleDistributionNoCall, - sampleDistributionNonInformativeAggregate, - sampleDistributionNonInformative, - sampleDistributionToalAggregate, - sampleDistributionTotal, - sampleDistributionVariantAggregate, - sampleDistributionVariant, - sampleCompareAlleleAbs, - sampleCompareAlleleNonRefAbs, - sampleCompareAlleleRefAbs, - sampleCompareAlleleRel, - sampleCompareGenotypeAbs, - sampleCompareGenotypeNonRefAbs, - sampleCompareGenotypeRefAbs, - sampleCompareGenotypeRel - ]) - } - - runtime { - cpu: localThreads - memory: memory - docker: dockerImage - } -} diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl deleted file mode 100644 index f3955658..00000000 --- a/biopet/sampleconfig.wdl +++ /dev/null @@ -1,143 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" as common - -task SampleConfig { - input { - Array[File]+ inputFiles - String keyFilePath - - File? toolJar - String? preCommand - String? sample - String? library - String? readgroup - String? jsonOutputPath - String? tsvOutputPath - - String javaXmx = "16G" - String memory = "17G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-sampleconfig -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p . ~{"$(dirname " + jsonOutputPath + ")"} ~{"$(dirname " + tsvOutputPath + ")"} - ~{toolCommand} \ - -i ~{sep="-i " inputFiles} \ - ~{"--sample " + sample} \ - ~{"--library " + library} \ - ~{"--readgroup " + readgroup} \ - ~{"--jsonOutput " + jsonOutputPath} \ - ~{"--tsvOutput " + tsvOutputPath} \ - > ~{keyFilePath} - } - - output { - File keysFile = keyFilePath - File? jsonOutput = jsonOutputPath - File? tsvOutput = tsvOutputPath - } - - runtime { - memory: memory - } -} - -task SampleConfigCromwellArrays { - input { - Array[File]+ inputFiles - String outputPath - - File? toolJar - String? preCommand - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-sampleconfig -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputPath}) - ~{toolCommand} CromwellArrays \ - -i ~{sep="-i " inputFiles} \ - ~{"-o " + outputPath} - } - - output { - File outputFile = outputPath - } - - runtime { - memory: memory - } -} - -task CaseControl { - input { - Array[File]+ inputFiles - Array[File]+ inputIndexFiles - Array[File]+ sampleConfigs - String outputPath - String controlTag = "control" - - File? toolJar - String? preCommand - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-sampleconfig -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputPath}) - ~{toolCommand} CaseControl \ - -i ~{sep=" -i " inputFiles} \ - -s ~{sep=" -s " sampleConfigs} \ - ~{"-o " + outputPath} \ - ~{"--controlTag " + controlTag} - } - - output { - File outputFile = outputPath - CaseControls caseControls = read_json(outputFile) - } - - runtime { - memory: memory - } -} diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl deleted file mode 100644 index c2eb5866..00000000 --- a/biopet/seqstat.wdl +++ /dev/null @@ -1,64 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" as common - -task Generate { - input { - FastqPair fastq - String outputFile - String sample - String library - String readgroup - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-seqstat -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputFile}) - ~{toolCommand} Generate \ - --fastqR1 ~{fastq.R1} \ - ~{"--fastqR2 " + fastq.R2} \ - --output ~{outputFile} \ - ~{"--sample " + sample} \ - ~{"--library " + library } \ - ~{"--readgroup " + readgroup } - } - - output { - File json = outputFile - } - - runtime { - memory: memory - } -} diff --git a/gffcompare.wdl b/gffcompare.wdl index 4b0d6d22..50cab8a6 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -145,10 +145,10 @@ task GffCompare { # outputs annotated: {description: "Annotated GTF file."} - loci: {description: ""} + loci: {description: "File describing the processed loci."} stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."} - tracking: {description: "File matching transcripts up between samples."} - allFiles: {description: "A collection of all outputs files."} + tracking: {description: "File matching up transcripts between samples."} + allFiles: {description: "A collection of all output files."} redundant: {description: "File containing duplicate/redundant transcripts."} missedIntrons: {description: "File denoting missed introns."} } From 33166e5795cc175bf8384d8b34ff2bcb4776fbfb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Nov 2020 12:57:36 +0100 Subject: [PATCH 171/668] Update TO-DO.md. --- TO-DO.md | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 9216bc0c..be125abe 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,4 +1,4 @@ -#TO DO +#TO-DO This file describes WDL files and tasks within those files which need more specific attention than just adding outputs to the parameter_meta. @@ -8,11 +8,6 @@ missing a parameter_meta section. Some tasks are importing other WDL files. ## Out of date with new cluster & parameter_meta: -* bamstats.wdl: `Generate` -* biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, - `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats` -* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl` -* seqstat.wdl: `Generate` * common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` * fastqsplitter.wdl: `Fastqsplitter` @@ -25,13 +20,8 @@ Some tasks are importing other WDL files. * wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` * picard.wdl: `ScatterIntervalList` -## Imports other tasks: -* bamstats.wdl -* biopet.wdl -* sampleconfig.wdl -* seqstat.wdl -* clever.wdl -* strelka.wdl - ## Requires input from others: +These tasks below are still missing descriptions `outputs` in +the `parameter_meta`. * somaticseq.wdl +* picard.wdl From 78951778ad81d402d21db421cc6f7284a24c1941 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Nov 2020 14:44:32 +0100 Subject: [PATCH 172/668] Fix syntax. --- gatk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 82244caa..5cf7c673 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1221,10 +1221,10 @@ task ModelSegments { copyRatioCBS: {description: "The posterior medians of the log2 copy ratio."} alleleFractionCBS: {description: "Minor-allele fraction."} unsmoothedModeledSegments: {description: "The initial modeled-segments result before segmentation smoothing."} - unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing"} + unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing."} unsmoothedAlleleFractionParameters: {description: "The initial allele-fraction-model global-parameter result before segmentation smoothing."} modeledSegments: {description: "The final modeled-segments result after segmentation smoothing."} - copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing"} + copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing."} alleleFractionParameters: {description: "The final allele-fraction-model global-parameter result after segmentation smoothing."} normalHetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the matched-normal sample."} } @@ -1766,7 +1766,7 @@ task VariantFiltration { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed. "} + filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed."} filteredVcfIndex: {description: "Index of filtered VCF."} } } From d5863eecf95da8f78d4d06af2bd6b91bc036a4f0 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 17 Nov 2020 14:45:02 +0100 Subject: [PATCH 173/668] Add parameter meta --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index 20fd1f95..029f0899 100644 --- a/picard.wdl +++ b/picard.wdl @@ -122,8 +122,10 @@ task CollectHsMetrics { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} targets: {description: "Picard interval file of the capture targets.", category: "required"} + targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} + baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 8cc4c073e40ac70f3398eda3bd047aa42d801d26 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 17 Nov 2020 14:46:39 +0100 Subject: [PATCH 174/668] Add period to end of sentence --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 029f0899..17930e3c 100644 --- a/picard.wdl +++ b/picard.wdl @@ -125,7 +125,7 @@ task CollectHsMetrics { targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} - baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set", category: "advanced"} + baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 3b0874c0ed573307c2de1926d6df41c808be149b Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 17 Nov 2020 15:59:10 +0100 Subject: [PATCH 175/668] Update picard.wdl parameter meta Co-authored-by: Davy Cats --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 17930e3c..d5601ad0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -122,7 +122,7 @@ task CollectHsMetrics { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} targets: {description: "Picard interval file of the capture targets.", category: "required"} - targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "required"} + targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "advanced"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set.", category: "advanced"} From 67116dfe6c9021a011b97889ee08f99f25d5e7b8 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 17 Nov 2020 16:56:08 +0100 Subject: [PATCH 176/668] Update version of tools. --- CHANGELOG.md | 2 ++ TO-DO.md | 27 --------------------------- cutadapt.wdl | 2 +- stringtie.wdl | 4 ++-- 4 files changed, 5 insertions(+), 30 deletions(-) delete mode 100644 TO-DO.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b668ab1..27d4aa71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update CutAdapt to version 3.0.0. ++ Update StringTie to version 2.1.4. + Complete `parameter_meta` for tasks missing the outputs. + DeepVariant: Add an optional input for the gvcf index. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. diff --git a/TO-DO.md b/TO-DO.md deleted file mode 100644 index be125abe..00000000 --- a/TO-DO.md +++ /dev/null @@ -1,27 +0,0 @@ -#TO-DO -This file describes WDL files and tasks within those files which need -more specific attention than just adding outputs to the parameter_meta. - -Some tasks have not been updated to match the new SLURM requirements and are -missing a parameter_meta section. - -Some tasks are importing other WDL files. - -## Out of date with new cluster & parameter_meta: -* common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, - `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` -* fastqsplitter.wdl: `Fastqsplitter` -* flash.wdl: `Flash` -* macs2.wdl: `PeakCalling` -* ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` -* seqtk.wdl: `Sample` -* spades.wdl: `Spades` -* unicycler.wdl: `Unicycler` -* wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` -* picard.wdl: `ScatterIntervalList` - -## Requires input from others: -These tasks below are still missing descriptions `outputs` in -the `parameter_meta`. -* somaticseq.wdl -* picard.wdl diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..b9f5a649 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" + String dockerImage = "quay.io/biocontainers/cutadapt:3.0--py37hf01694f_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) diff --git a/stringtie.wdl b/stringtie.wdl index 05df05c6..81d96132 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -35,7 +35,7 @@ task Stringtie { Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) - String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" + String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } command { @@ -101,7 +101,7 @@ task Merge { String memory = "10G" Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20) - String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" + String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } command { From 826cbaf4c0b3eae2b5fb3db8439211c1d9f8fdab Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 17 Nov 2020 17:05:22 +0100 Subject: [PATCH 177/668] Update versions. --- CHANGELOG.md | 2 ++ minimap2.wdl | 4 ++-- multiqc.wdl | 2 +- nanopack.wdl | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27d4aa71..3da95305 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update NanoPlot to version 1.32.1. ++ Update MultiQC to version 1.9. + Update CutAdapt to version 3.0.0. + Update StringTie to version 2.1.4. + Complete `parameter_meta` for tasks missing the outputs. diff --git a/minimap2.wdl b/minimap2.wdl index 1b719da6..d2e69905 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -33,7 +33,7 @@ task Indexing { Int cores = 1 String memory = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" + String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" } command { @@ -100,7 +100,7 @@ task Mapping { Int cores = 4 String memory = "30G" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" + String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" } command { diff --git a/multiqc.wdl b/multiqc.wdl index 405c0a0b..2571463a 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 2 + ceil(size(reports, "G") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" + String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } Int memoryGb = 2 + ceil(size(reports, "G")) diff --git a/nanopack.wdl b/nanopack.wdl index f238ce7b..f86641b0 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -42,7 +42,7 @@ task NanoPlot { Int threads = 2 String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/nanoplot:1.32.0--py_0" + String dockerImage = "quay.io/biocontainers/nanoplot:1.32.1--py_0" } Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} From 70b3484461c1b887f558bb2a5a327ce98ac4f388 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 19 Nov 2020 12:53:26 +0100 Subject: [PATCH 178/668] Update versions. --- CHANGELOG.md | 4 ++++ isoseq3.wdl | 2 +- lima.wdl | 2 +- picard.wdl | 32 ++++++++++++++++---------------- samtools.wdl | 18 +++++++++--------- scripts | 2 +- 6 files changed, 32 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3da95305..dae3f185 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update Lima to version 2.0.0. ++ Update IsoSeq3 to version 3.4.0. ++ Update samtools to version 1.11. ++ Update Picard to version 2.23.8. + Update NanoPlot to version 1.32.1. + Update MultiQC to version 1.9. + Update CutAdapt to version 3.0.0. diff --git a/isoseq3.wdl b/isoseq3.wdl index c1c4397c..aacbfc60 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -34,7 +34,7 @@ task Refine { Int threads = 2 String memory = "2G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" + String dockerImage = "quay.io/biocontainers/isoseq3:3.4.0--0" } command { diff --git a/lima.wdl b/lima.wdl index 33b2328b..119db3f4 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,7 +51,7 @@ task Lima { Int threads = 2 String memory = "2G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" + String dockerImage = "quay.io/biocontainers/lima:2.0.0--0" } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} diff --git a/picard.wdl b/picard.wdl index d5601ad0..f75fdc32 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -158,7 +158,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -284,7 +284,7 @@ task CollectRnaSeqMetrics { String memory = "9G" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -342,7 +342,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -404,7 +404,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -453,7 +453,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3G" - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -503,7 +503,7 @@ task GatherBamFiles { Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -558,7 +558,7 @@ task GatherVcfs { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -622,7 +622,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -701,7 +701,7 @@ task MergeVCFs { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -757,7 +757,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" File? noneFile } @@ -818,7 +818,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4G" - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -859,7 +859,7 @@ task SortSam { # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -917,7 +917,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } @@ -967,7 +967,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { diff --git a/samtools.wdl b/samtools.wdl index 0aecf4ee..9042a0df 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -69,7 +69,7 @@ task Faidx { String outputDir String memory = "2G" - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -118,7 +118,7 @@ task Fastq { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -180,7 +180,7 @@ task FilterShortReadsBam { String memory = "1G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -226,7 +226,7 @@ task Flagstat { String memory = "256M" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -266,7 +266,7 @@ task Index { String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } # Select_first is needed, otherwise womtool validate fails. @@ -317,7 +317,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -356,7 +356,7 @@ task Merge { Int threads = 1 String memory = "4G" Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -411,7 +411,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -523,7 +523,7 @@ task View { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } String outputIndexPath = basename(outputFileName) + ".bai" diff --git a/scripts b/scripts index 0cca0f40..85e2ec54 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 0cca0f40a8e9121e8dcc9e76838f85835a0d8e94 +Subproject commit 85e2ec542b65be5f2a25c22db05c28700fbe6db5 From b5558be1a1706b2ad96f947e61db78985c747cd7 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 19 Nov 2020 13:29:00 +0100 Subject: [PATCH 179/668] Revert update CutAdapt. --- .travis.yml | 3 ++- CHANGELOG.md | 1 - VERSION | 2 +- cutadapt.wdl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 396b998f..3cf0681f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,4 +19,5 @@ before_install: install: - conda install --file requirements-test.txt -script: bash scripts/biowdl_lint.sh +script: + - bash scripts/biowdl_lint.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index dae3f185..a6cc9bff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,6 @@ version 5.0.0-dev + Update Picard to version 2.23.8. + Update NanoPlot to version 1.32.1. + Update MultiQC to version 1.9. -+ Update CutAdapt to version 3.0.0. + Update StringTie to version 2.1.4. + Complete `parameter_meta` for tasks missing the outputs. + DeepVariant: Add an optional input for the gvcf index. diff --git a/VERSION b/VERSION index ee74734a..0062ac97 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.1.0 +5.0.0 diff --git a/cutadapt.wdl b/cutadapt.wdl index b9f5a649..b2dbdec0 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:3.0--py37hf01694f_0" + String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) From 9b8d8a9844ea41ad4f1f630ed6b816be5596f8c9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:04:28 +0100 Subject: [PATCH 180/668] add hmftools.wdl --- hmftools.wdl | 433 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 hmftools.wdl diff --git a/hmftools.wdl b/hmftools.wdl new file mode 100644 index 00000000..73c3e318 --- /dev/null +++ b/hmftools.wdl @@ -0,0 +1,433 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Amber { + input { + String normalName + File normalBam + File normalBamIndex + String tumorName + File tumorBam + File tumorBamIndex + String outputDir = "./amber" + File loci + File referenceFasta + File referenceFastaFai + File referenceFastaDict + + Int threads = 2 + String memory = = "33G" + String javaXmx = "32G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" + } + + command { + AMBER -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -reference_bam ~{normalBam} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + -output_dir ~{outputDir} \ + -threads ~{threads} \ + -ref_genome ~{referenceFasta} \ + -loci ~{loci} + } + + output { + File version = "amber.version" + File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" + File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" + File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" + File tumorBafVcfIndex = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz.tbi" + File tumorContaminationVcf = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz" + File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" + File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" + File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" + File normalSnpVcf = "~{outputDir}/~{normalName}.amber.snp.vcf.gz" + File normalSnpVcfIndex = "~{outputDir}/~{normalName}.amber.snp.vcf.gz.tbi" + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, + tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, + normalSnpVcf, normalSnpVcfIndex] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + cpu: threads + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + normalBam: {description: "The normal BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The tumor BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + loci: {description: "A VCF file containing likely heterozygous sites.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Cobalt { + input { + String normalName + File normalBam + File normalBamIndex + String tumorName + File tumorBam + File tumorBamIndex + String outputDir = "./cobalt" + File gcProfile + + Int threads = 1 + String memory = = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" + } + + command { + COBALT -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -reference_bam ~{normalBam} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + -output_dir ~{outputDir}\ + -threads ~{threads} \ + -gc_profile ~{gcProfile} + } + + output { + File version = "cobalt.version" + File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" + File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" + File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" + File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" + File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" + File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" + File tumorChrLen = "~{outputDir}/~{tumorName}.chr.len" + Array[File] outputs = [version, normalGcMedianTsv, normalRationMedianTsv, + normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv, tumorChrLen] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + cpu: threads + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + normalBam: {description: "The normal BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The tumor BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GripssApplicationKt { + input { + File inputVcf + String outputPath = "gripss.vcf.gz" + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File breakpointHotspot + File breakendPon + File breakpointPon + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssApplicationKt \ + -ref_genome ~{referenceFasta} \ + -breakpoint_hotspot ~{breakpointHotspot} \ + -breakend_pon ~{breakendPon} \ + -breakpoint_pon ~{breakpointPon} \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GripssHardFilterApplicationKt { + input { + File inputVcf + String outputPath = "gripss_hard_filter.vcf.gz" + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Purple { + input { + String normalName + String tumorName + String outputDir = "./purple" + Array[File]+ amberOutput + Array[File]+ cobaltOutput + File gcProfile + File somaticVcf + File filteredSvVcf + File fullSvVcf + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File hotspots + + Int threads = 1 + Int time_minutes = 60 + String memory = "13G" + String javaXmx = "12G" + String docker = "quay.io/biocontainers/hmftools-purple:2.51--1" + } + + command { + PURPLE -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -tumor ~{tumorName} \ + -output_dir ~{outputDir} \ + -amber ~{sub(amberOutput, basename(amberOutput[0]), "")} \ + -cobalt ~{sub(cobaltOutput, basename(cobaltOutput[0]), "")} \ + -gc_profile ~{gcProfile} \ + -somatic_vcf ~{somaticVcf} \ + -structural_vcf ~{filteredSvVcf} \ + -sv_recovery_vcf ~{fullSvVcf} \ + -circos /usr/local/bin/circos \ + -ref_genome ~{referenceFasta} \ + -driver_catalog \ + -hotspots ~{hotspots} \ + -threads ~{threads} + + # TODO if shallow also the following: + #-highly_diploid_percentage 0.88 \ + #-somatic_min_total 100 \ + #-somatic_min_purity_spread 0.1 + } + + output { + #TODO + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + amberOutput: {description: "The output files of hmftools amber.", category: "required"} + cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"} + gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + somaticVcf: {description: "The somatic variant calling results.", category: "required"} + filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"} + fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Sage { + input { + String tumorName + File tumorBam + File tumorBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File hotspots + File panelBed + File highConfidenceBed + Boolean hg38 = false + String outputPath = "./sage.vcf.gz" + + String? normalName + File? normalBam + File? normalBamIndex + + Int threads = 2 + String javaXmx = "32G" + String memory = "33G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{true="hg38" false="hg19" hg38} \ + -threads ~{threads} \ + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. + # This seems to be a systemic issue with R generated plots in biocontainers... + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} + highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 90fd344b8f41fb6b1d632a8412ec2b416c5c7715 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:11:19 +0100 Subject: [PATCH 181/668] fix some typos --- hmftools.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 73c3e318..3757cade 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,7 +35,7 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = = "33G" + String memory = "33G" String javaXmx = "32G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" @@ -112,7 +112,7 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = = "9G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" @@ -286,10 +286,10 @@ task Purple { File hotspots Int threads = 1 - Int time_minutes = 60 + Int timeMinutes = 60 String memory = "13G" String javaXmx = "12G" - String docker = "quay.io/biocontainers/hmftools-purple:2.51--1" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.51--1" } command { @@ -297,8 +297,8 @@ task Purple { -reference ~{normalName} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ - -amber ~{sub(amberOutput, basename(amberOutput[0]), "")} \ - -cobalt ~{sub(cobaltOutput, basename(cobaltOutput[0]), "")} \ + -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ + -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ -structural_vcf ~{filteredSvVcf} \ From 764f188c73d8c1b57f0d50b148a30d0e84309c42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:39:52 +0100 Subject: [PATCH 182/668] fix outputs amber/cobalt --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3757cade..09af79c9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -54,7 +54,7 @@ task Amber { } output { - File version = "amber.version" + File version = "~{outputDir}/amber.version" File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" @@ -130,7 +130,7 @@ task Cobalt { } output { - File version = "cobalt.version" + File version = "~{outputDir}/cobalt.version" File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" From 857da21ef4b61276d3beb5ddbe56d0895cd96c32 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:54:06 +0100 Subject: [PATCH 183/668] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 09af79c9..ed2914bf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -124,7 +124,7 @@ task Cobalt { -reference_bam ~{normalBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ - -output_dir ~{outputDir}\ + -output_dir ~{outputDir} \ -threads ~{threads} \ -gc_profile ~{gcProfile} } From 54ac9d0c41f74c578f2418bc76483d1081695369 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 15:18:41 +0100 Subject: [PATCH 184/668] add missed argument in purple --- hmftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index ed2914bf..fc56ecd9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -283,6 +283,7 @@ task Purple { File referenceFasta File referenceFastaFai File referenceFastaDict + File driverGenePanel File hotspots Int threads = 1 @@ -306,6 +307,7 @@ task Purple { -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ -driver_catalog \ + -driver_gene_panel ~{driverGenePanel} \ -hotspots ~{hotspots} \ -threads ~{threads} @@ -340,6 +342,7 @@ task Purple { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + driverGenePanel: {description: "A bed file describing the driver gene panel.", category: "required"} hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} From 243c1dbfc834d2e52876e826bf2f852fe51cb2fb Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 26 Nov 2020 09:05:09 +0100 Subject: [PATCH 185/668] enable genotyping --- smoove.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index e5c5348f..d1011f6c 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -41,11 +41,13 @@ task Call { --outdir ~{outputDir} \ --name ~{sample} \ --fasta ~{referenceFasta} \ + --removepr \ + --genotype \ ~{bamFile} } output { - File smooveVcf = outputDir + "/" + sample + "-smoove.vcf.gz" + File smooveVcf = outputDir + "/" + sample + "-smoove.genotyped.vcf.gz" } runtime { From 86f26caf9fa94c5aa2b2e917bc608e1ef8173966 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 26 Nov 2020 09:31:40 +0100 Subject: [PATCH 186/668] Update PacBio tasks. --- lima.wdl | 7 +++++-- pacbio.wdl | 10 +++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lima.wdl b/lima.wdl index 119db3f4..c06a9a73 100644 --- a/lima.wdl +++ b/lima.wdl @@ -88,9 +88,12 @@ task Lima { ~{barcodeFile} \ ~{outputPrefix + ".bam"} - # copy the files with the default filename to the folder specified in + # Copy the files with the default filename to the folder specified in # outputPrefix. - if [ "~{basename(outputPrefix)}.json" != "~{outputPrefix}.json" ]; then + if [[ -f "~{outputPrefix}.json" ]] + then + echo "Log files already at output location." + else cp "~{basename(outputPrefix)}.json" "~{outputPrefix}.json" cp "~{basename(outputPrefix)}.lima.counts" "~{outputPrefix}.lima.counts" cp "~{basename(outputPrefix)}.lima.report" "~{outputPrefix}.lima.report" diff --git a/pacbio.wdl b/pacbio.wdl index df0343d9..7c0113fd 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -23,7 +23,7 @@ version 1.0 task mergePacBio { input { Array[File]+ reports - String mergedReport + String outputPathMergedReport String memory = "4G" String dockerImage = "lumc/pacbio-merge:0.2" @@ -31,10 +31,10 @@ task mergePacBio { command { set -e - mkdir -p $(dirname ~{mergedReport}) + mkdir -p $(dirname ~{outputPathMergedReport}) pacbio_merge \ --reports ~{sep=" " reports} \ - --json-output ~{mergedReport} + --json-output ~{outputPathMergedReport} } runtime { @@ -43,13 +43,13 @@ task mergePacBio { } output { - File outputMergedReport = mergedReport + File outputMergedReport = outputPathMergedReport } parameter_meta { # inputs reports: {description: "The PacBio report files to merge.", category: "required"} - mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} + outputPathMergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 02f71e1708a92c7128165ab2919b3c9f4fb117dc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 27 Nov 2020 14:44:01 +0100 Subject: [PATCH 187/668] Upload another fix. --- pbbam.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/pbbam.wdl b/pbbam.wdl index d893e64d..ae64b87c 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -36,7 +36,6 @@ task Index { String bamIndexPath = outputPath + ".pbi" command { - bash -c ' set -e # Make sure outputBamPath does not exist. if [ ! -f ~{outputPath} ] @@ -45,7 +44,6 @@ task Index { ln ~{bamFile} ~{outputPath} fi pbindex ~{outputPath} ~{bamIndexPath} - ' } output { From a34711e264482507e73669190b456d4de499f164 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 30 Nov 2020 10:37:23 +0100 Subject: [PATCH 188/668] downgrade stringtie and fix size call in gffread --- CHANGELOG.md | 5 ++++- gffread.wdl | 2 +- stringtie.wdl | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6cc9bff..216fdd67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,13 +10,16 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Fixed the `size` call in the default for gffread's timeMinutes, to retrieve + GBs instead of bytes. ++ Update stringtie to version 1.3.6. + Update Lima to version 2.0.0. + Update IsoSeq3 to version 3.4.0. + Update samtools to version 1.11. + Update Picard to version 2.23.8. + Update NanoPlot to version 1.32.1. + Update MultiQC to version 1.9. -+ Update StringTie to version 2.1.4. ++ ~Update StringTie to version 2.1.4.~ + Complete `parameter_meta` for tasks missing the outputs. + DeepVariant: Add an optional input for the gvcf index. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. diff --git a/gffread.wdl b/gffread.wdl index 66230989..967dd5c9 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,7 +32,7 @@ task GffRead { String? proteinFastaPath String? filteredGffPath - Int timeMinutes = 1 + ceil(size(inputGff) * 10) + Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } diff --git a/stringtie.wdl b/stringtie.wdl index 81d96132..d3a6f73d 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -35,7 +35,7 @@ task Stringtie { Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) - String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" + String dockerImage = "quay.io/biocontainers/stringtie:1.3.6--h92e31bf_0" } command { From ff47f07c0657f717fbf2311b56cdd3ad3b23a7c2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 30 Nov 2020 17:22:38 +0100 Subject: [PATCH 189/668] Update lima. --- CHANGELOG.md | 2 ++ lima.wdl | 39 ++++++++++++++++----------------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6cc9bff..01303723 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. ++ Lima: Fix copy commands. + Update Lima to version 2.0.0. + Update IsoSeq3 to version 3.4.0. + Update samtools to version 1.11. diff --git a/lima.wdl b/lima.wdl index c06a9a73..90cd6986 100644 --- a/lima.wdl +++ b/lima.wdl @@ -56,7 +56,7 @@ task Lima { Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" lima \ @@ -83,33 +83,26 @@ task Lima { ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ --num-threads ~{threads} \ - ~{"--log-file " + outputPrefix + ".stderr.log"} \ + ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{outputPrefix + ".bam"} + ~{outputPrefix + ".fl.bam"} - # Copy the files with the default filename to the folder specified in - # outputPrefix. - if [[ -f "~{outputPrefix}.json" ]] - then - echo "Log files already at output location." - else - cp "~{basename(outputPrefix)}.json" "~{outputPrefix}.json" - cp "~{basename(outputPrefix)}.lima.counts" "~{outputPrefix}.lima.counts" - cp "~{basename(outputPrefix)}.lima.report" "~{outputPrefix}.lima.report" - cp "~{basename(outputPrefix)}.lima.summary" "~{outputPrefix}.lima.summary" - fi - } + dirName="$(dirname ~{outputPrefix})" + find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam" > bamFiles.txt + find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam.pbi" > bamIndexes.txt + find "$(cd ${dirName}; pwd)" -name "*.fl.*.subreadset.xml" > subreadsets.txt + >>> output { - Array[File] limaBam = glob("*.bam") - Array[File] limaBamIndex = glob("*.bam.pbi") - Array[File] limaXml = glob("*.subreadset.xml") - File limaStderr = outputPrefix + ".stderr.log" - File limaJson = outputPrefix + ".json" - File limaCounts = outputPrefix + ".lima.counts" - File limaReport = outputPrefix + ".lima.report" - File limaSummary = outputPrefix + ".lima.summary" + Array[File] limaBam = read_lines("bamFiles.txt") + Array[File] limaBamIndex = read_lines("bamIndexes.txt") + Array[File] limaXml = read_lines("subreadsets.txt") + File limaStderr = outputPrefix + ".fl.stderr.log" + File limaJson = outputPrefix + ".fl.json" + File limaCounts = outputPrefix + ".fl.lima.counts" + File limaReport = outputPrefix + ".fl.lima.report" + File limaSummary = outputPrefix + ".fl.lima.summary" } runtime { From 3de3fcc809734b3a43080a75e9ad683bb0ee055f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 30 Nov 2020 17:24:07 +0100 Subject: [PATCH 190/668] Update CHANGELOG. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01303723..0d6c0bc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. -+ Lima: Fix copy commands. ++ Lima: Fix copy commands & return to `fl` naming. + Update Lima to version 2.0.0. + Update IsoSeq3 to version 3.4.0. + Update samtools to version 1.11. From 0df52e802caa2e7f3793ec37f6378d8929bb6411 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 1 Dec 2020 12:21:37 +0100 Subject: [PATCH 191/668] Remove naming. --- lima.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lima.wdl b/lima.wdl index 90cd6986..2455aaac 100644 --- a/lima.wdl +++ b/lima.wdl @@ -83,26 +83,26 @@ task Lima { ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ --num-threads ~{threads} \ - ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ + ~{"--log-file " + outputPrefix + ".lima.stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{outputPrefix + ".fl.bam"} + ~{outputPrefix + ".bam"} dirName="$(dirname ~{outputPrefix})" - find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam" > bamFiles.txt - find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam.pbi" > bamIndexes.txt - find "$(cd ${dirName}; pwd)" -name "*.fl.*.subreadset.xml" > subreadsets.txt + find "$(cd ${dirName}; pwd)" -name "*.bam" > bamFiles.txt + find "$(cd ${dirName}; pwd)" -name "*.bam.pbi" > bamIndexes.txt + find "$(cd ${dirName}; pwd)" -name "*.subreadset.xml" > subreadsets.txt >>> output { Array[File] limaBam = read_lines("bamFiles.txt") Array[File] limaBamIndex = read_lines("bamIndexes.txt") Array[File] limaXml = read_lines("subreadsets.txt") - File limaStderr = outputPrefix + ".fl.stderr.log" - File limaJson = outputPrefix + ".fl.json" - File limaCounts = outputPrefix + ".fl.lima.counts" - File limaReport = outputPrefix + ".fl.lima.report" - File limaSummary = outputPrefix + ".fl.lima.summary" + File limaStderr = outputPrefix + ".lima.stderr.log" + File limaJson = outputPrefix + ".json" + File limaCounts = outputPrefix + ".lima.counts" + File limaReport = outputPrefix + ".lima.report" + File limaSummary = outputPrefix + ".lima.summary" } runtime { From fec33b447644769d5c1602d7a0fee0c6ee19b3b9 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 1 Dec 2020 12:22:25 +0100 Subject: [PATCH 192/668] Update changelog. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77cf803b..22f41826 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. -+ Lima: Fix copy commands & return to `fl` naming. ++ Lima: Fix copy commands. + Fixed the `size` call in the default for gffread's timeMinutes, to retrieve GBs instead of bytes. + Update stringtie to version 1.3.6. From e87052a739ba2d2ac29cf0dad1cb5ace642f6e8c Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 9 Dec 2020 13:26:24 +0100 Subject: [PATCH 193/668] add duphold paramater in smoove --- smoove.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/smoove.wdl b/smoove.wdl index d1011f6c..7a7e4305 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -43,6 +43,7 @@ task Call { --fasta ~{referenceFasta} \ --removepr \ --genotype \ + --duphold \ ~{bamFile} } From 19b79d9c2617212deb1d2dca1e6ca93c2115d847 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Dec 2020 15:59:19 +0100 Subject: [PATCH 194/668] Use github actions CI --- .github/workflows/ci.yml | 32 ++++++++++++++++++++++++++++++++ .travis.yml | 23 ----------------------- requirements-test.txt | 11 ----------- 3 files changed, 32 insertions(+), 34 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml delete mode 100644 requirements-test.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..97d329ad --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,32 @@ +name: Continuous integration + +on: + pull_request: + paths: + - "**.wdl" # Workflow files and task + - "**.yml" # Ci configuration, tests and docker images + - "!docs/**" + +defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important + shell: bash -l {0} + +jobs: + lint: + runs-on: ubuntu-latest + name: Womtool validate and submodule up to date. + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: install miniconda + uses: conda-incubator/setup-miniconda@v2.0.1 + with: + channels: conda-forge,bioconda,defaults + # Conda-incubator uses 'test' environment by default. + - name: install requirements + run: conda install -n test cromwell miniwdl wdl-aid + - name: run linting + run: bash scripts/biowdl_lint.sh \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 3cf0681f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,23 +0,0 @@ -# We use conda to install cromwell. - -language: python - -python: - - 3.6 - -before_install: - # Install conda - - export MINICONDA=${HOME}/miniconda - - export PATH=${MINICONDA}/bin:${PATH} - - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -f -p ${MINICONDA} - - conda config --set always_yes yes - - conda config --add channels defaults - - conda config --add channels bioconda - - conda config --add channels conda-forge - -install: - - conda install --file requirements-test.txt - -script: - - bash scripts/biowdl_lint.sh diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index 0b01d193..00000000 --- a/requirements-test.txt +++ /dev/null @@ -1,11 +0,0 @@ -# These are the programs used for testing these biowdl tasks. -# These requirements can be installed with conda with the bioconda channel -# activated. -# For more information on how to set up conda with bioconda channel see: -# http://bioconda.github.io/#install-conda -# This file can be installed with "conda install --file requirements-test.txt". - -cromwell -womtool -miniwdl -wdl-aid From 52b7c02f4ed1e7bee376af192747efa75cf55004 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 10:04:43 +0100 Subject: [PATCH 195/668] bcftools: rm memory parameter meta --- bcftools.wdl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28380dea..0cbfdefd 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -43,7 +43,7 @@ task Annotate { File? regionsFile File? renameChrs File? samplesFile - + Int threads = 0 String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) @@ -53,7 +53,7 @@ task Annotate { Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { - set -e + set -e mkdir -p "$(dirname ~{outputPath})" bcftools annotate \ -o ~{outputPath} \ @@ -154,7 +154,7 @@ task Sort { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" } - + runtime { memory: memory time_minutes: timeMinutes @@ -291,6 +291,8 @@ task View { File inputFile String outputPath = "output.vcf" + String? exclude + String? include String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -302,6 +304,8 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ + ~{"--include " + include} \ + ~{"--exclude " + exclude} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -324,7 +328,8 @@ task View { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4cf91963c64c48478c8009e65aa20678ad423eb9 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 10:44:10 +0100 Subject: [PATCH 196/668] add duphold --- duphold.sh | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 duphold.sh diff --git a/duphold.sh b/duphold.sh new file mode 100644 index 00000000..6e65ee5c --- /dev/null +++ b/duphold.sh @@ -0,0 +1,76 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Duphold { + input { + File inputVcf + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputPath = "./duphold.vcf" + + String memory = "15G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + } + + String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" + + command { + set -e + mkdir -p ~{outputDir} + export DUPHOLD_SAMPLE_NAME=~{sample} + duphold \ + -v ~{inputVcf} \ + -b ~{bamFile} \ + -f ~{referenceFasta} \ + -o ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } + sample: {description: "The name of the sample.", category: "required"} + outputDir: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + smooveVcf: {description: "Calls of structural variants in VCF file."} + } +} From fb65bfe1ab5e627cb23812264ab651748e844b89 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 11:13:40 +0100 Subject: [PATCH 197/668] add duphold.wdl --- duphold.sh => duphold.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) rename duphold.sh => duphold.wdl (92%) diff --git a/duphold.sh b/duphold.wdl similarity index 92% rename from duphold.sh rename to duphold.wdl index 6e65ee5c..9c7255ff 100644 --- a/duphold.sh +++ b/duphold.wdl @@ -32,7 +32,7 @@ task Duphold { String memory = "15G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" @@ -60,6 +60,7 @@ task Duphold { parameter_meta { # inputs + inputVcf: {description: "The VCF file to process.", category: "required"} bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index of the bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} @@ -71,6 +72,6 @@ task Duphold { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls of structural variants in VCF file."} + outputVcf: {description: "Duphold annotated VCF file."} } } From fca78c3d28d57b5ebfe802deccc52b86ae00c651 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 12:51:23 +0100 Subject: [PATCH 198/668] fix outputpath --- duphold.wdl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/duphold.wdl b/duphold.wdl index 9c7255ff..80fe31d2 100644 --- a/duphold.wdl +++ b/duphold.wdl @@ -35,11 +35,9 @@ task Duphold { String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } - String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" - command { set -e - mkdir -p ~{outputDir} + mkdir -p "$(dirname ~{outputPath})" export DUPHOLD_SAMPLE_NAME=~{sample} duphold \ -v ~{inputVcf} \ @@ -66,7 +64,7 @@ task Duphold { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } sample: {description: "The name of the sample.", category: "required"} - outputDir: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 80566da7e582afa0d445547fb3555a8f9cccae07 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 12:51:39 +0100 Subject: [PATCH 199/668] remove duphold parameter --- smoove.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 7a7e4305..d1011f6c 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -43,7 +43,6 @@ task Call { --fasta ~{referenceFasta} \ --removepr \ --genotype \ - --duphold \ ~{bamFile} } From 0232cf8e79dc6975eecc9a7d2336f45f2d191f05 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Dec 2020 16:19:20 +0100 Subject: [PATCH 200/668] add some taks --- hmftools.wdl | 47 ++++++++++++++++++++++++++++++++++++++ picard.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index fc56ecd9..f9a606e7 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,6 +269,53 @@ task GripssHardFilterApplicationKt { } } +task HealthChecker { + input { + String normalName + String tumorName + + String javaXmx = "10G" + } + + command { + java -Xmx10G \ + -jar /opt/tools/health-checker/3.1/health-checker.jar \ + -reference ~{normalName} \ + -tumor ~{tumorName} \ + -metrics_dir ~{metricsPath} \ + -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -output_dir ~{outputDir} + } + + # super("health-checker", + # Versions.HEALTH_CHECKER, + # "health-checker.jar", + # "10G", + # Lists.newArrayList("-reference", + # referenceSampleName, + # "-tumor", + # tumorSampleName, + # "-ref_wgs_metrics_file", + # referenceMetricsPath, + # "-tum_wgs_metrics_file", + # tumorMetricsPath, + # "-ref_flagstat_file", + # referenceFlagstatPath, + # "-tum_flagstat_file", + # tumorFlagstatPath, + # "-purple_dir", + # purplePath, + # "-output_dir", + # outputPath)); + + output { + + } + + +} + task Purple { input { String normalName diff --git a/picard.wdl b/picard.wdl index 1afa5ea7..88ddd313 100644 --- a/picard.wdl +++ b/picard.wdl @@ -315,6 +315,70 @@ task CollectTargetedPcrMetrics { } } +task CollectWgsMetrics { + input { + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + String outputPath = "./wgs_metrics.txt" + + Int? minimumMappingQuality + Int? minimumBaseQuality + Int? coverageCap + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + CollectWgsMetrics \ + REFERENCE_SEQUENCE=~{referenceFasta} \ + INPUT=~{inputBam} \ + OUTPUT=~{outputPath} \ + ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ + ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ + ~{"OVERAGE_CAP=" + coverageCap} + } + + output { + File metrics = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputPath: {description: "The path picard CollectWgsMetrics' output should be written to.", category: "common"} + minimumMappingQuality: {description: "Equivalent to picard CollectWgsMetrics' MINIMUM_MAPPING_QUALITY option.", category: "advanced"} + minimumBaseQuality: {description: "Equivalent to picard CollectWgsMetrics' MINIMUM_BASE_QUALITY option.", category: "advanced"} + coverageCap: {description: "Equivalent to picard CollectWgsMetrics' OVERAGE_CAP option.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CreateSequenceDictionary { input { File inputFile From 9896f4fcaba3d5ee9b070a03a21bc23484037fb1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 16 Dec 2020 14:08:56 +0100 Subject: [PATCH 201/668] add purple outputs --- bcftools.wdl | 2 +- bwa.wdl | 2 +- gridss.wdl | 2 +- hmftools.wdl | 39 ++++++++++++++++++++++++++++++++++----- sambamba.wdl | 2 +- 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1dba7611..c91460bb 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -50,7 +50,7 @@ task Annotate { Int threads = 0 String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 10 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bwa.wdl b/bwa.wdl index fdeb870f..44cfc9fe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -34,7 +34,7 @@ task Mem { Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 260 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } diff --git a/gridss.wdl b/gridss.wdl index c444c854..88655442 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,7 +35,7 @@ task GRIDSS { Int jvmHeapSizeGb = 30 Int threads = 2 - Int timeMinutes = ceil(1440 / threads) + 10 + Int timeMinutes = ceil(2880 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } diff --git a/hmftools.wdl b/hmftools.wdl index f9a606e7..86d90332 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "33G" String javaXmx = "32G" - Int timeMinutes = 60 + Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -114,7 +114,7 @@ task Cobalt { Int threads = 1 String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 60 + Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" } @@ -312,8 +312,6 @@ task HealthChecker { output { } - - } task Purple { @@ -327,6 +325,7 @@ task Purple { File somaticVcf File filteredSvVcf File fullSvVcf + File fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -365,7 +364,37 @@ task Purple { } output { - #TODO + File driverCatalogTsv = "~{outputDir}/~{tumorName}.driver.catalog.tsv" + File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" + File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" + File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" + File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv" + File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv" + File purpleQc = "~{outputDir}/~{tumorName}.purple.qc" + File purpleSegmentTsv = "~{outputDir}/~{tumorName}.purple.segment.tsv" + File purpleSomaticClonalityTsv = "~{outputDir}/~{tumorName}.purple.somatic.clonality.tsv" + File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" + File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" + File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" + File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" + File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" + File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" + File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png" + File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png" + File mapPlot = "~{outputDir}/plot/~{tumorName}.map.png" + File purityRangePlot = "~{outputDir}/plot/~{tumorName}.purity.range.png" + File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" + File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" + File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" + File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" + File purpleVersion = "~{outputDir}/purple.version" + Array[File] outputs = [driverCatalogTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, + purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, + purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, + purpleVersion] + Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, + segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] } runtime { diff --git a/sambamba.wdl b/sambamba.wdl index cd8da21e..3fc57c65 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -41,7 +41,7 @@ task Markdup { Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 16) / threads } String bamIndexPath = sub(outputPath, "\.bam$", ".bai") From df51100b8ffd6cb2dee27859b46ef94d901f4715 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Tue, 22 Dec 2020 13:41:30 +0100 Subject: [PATCH 202/668] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22f41826..424dc764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. + Fixed the `size` call in the default for gffread's timeMinutes, to retrieve From f60a018191e1b96a5abdfae8b68d4ae4d3ee3b06 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Tue, 22 Dec 2020 13:42:05 +0100 Subject: [PATCH 203/668] add tasks to create input files for DGE analysis --- prepareShiny.wdl | 108 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 prepareShiny.wdl diff --git a/prepareShiny.wdl b/prepareShiny.wdl new file mode 100644 index 00000000..d304798d --- /dev/null +++ b/prepareShiny.wdl @@ -0,0 +1,108 @@ +version 1.0 + +# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task CreateDesignMatrix { + input { + File countTable + String shinyDir = "." + + Int threads = 1 + String memory = "5G" + Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + } + + command { + set -e + mkdir -p ${shinyDir} + predex design \ + -i ${countTable} \ + -o ${shinyDir} + } + + output { + File dgeDesign = shinyDir + "/design_matrix.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + countTable: {description: "The created count table from HTseq.", category: "required"} + shinyDir: {description: "The directory to write the output to.", category: "required"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task CreateAnnotation { + input { + File referenceFasta + File referenceGtfFile + String shinyDir = "." + + Int threads = 1 + String memory = "10G" + Int timeMinutes = 90 + String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + } + + command { + set -e + mkdir -p ${shinyDir} + predex annotation \ + -f ${referenceFasta} \ + -g ${referenceGtfFile} \ + -o ${shinyDir} + } + + output { + File dgeAnnotation = shinyDir + "/annotation.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + referenceFasta: {description: "The reference Fasta file.", category: "required"} + referenceGtfFile: {description: "The reference GTF file.", category: "required"} + shinyDir: {description: "The directory to write the output to.", category: "required"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From fcd32243e1aaa62a842435e5cc2671843d8afc54 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Thu, 24 Dec 2020 13:12:23 +0100 Subject: [PATCH 204/668] style update --- prepareShiny.wdl | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/prepareShiny.wdl b/prepareShiny.wdl index d304798d..81354a16 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -33,10 +33,10 @@ task CreateDesignMatrix { command { set -e - mkdir -p ${shinyDir} + mkdir -p ~{shinyDir} predex design \ - -i ${countTable} \ - -o ${shinyDir} + -i ~{countTable} \ + -o ~{shinyDir} } output { @@ -51,14 +51,16 @@ task CreateDesignMatrix { } parameter_meta { + # inputs countTable: {description: "The created count table from HTseq.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - + shinyDir: {description: "The directory to write the output to.", category: "required"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dgeDesign: {description: "Design matrix template to add sample information for DGE analysis."} } } @@ -76,11 +78,11 @@ task CreateAnnotation { command { set -e - mkdir -p ${shinyDir} + mkdir -p ~{shinyDir} predex annotation \ - -f ${referenceFasta} \ - -g ${referenceGtfFile} \ - -o ${shinyDir} + -f ~{referenceFasta} \ + -g ~{referenceGtfFile} \ + -o ~{shinyDir} } output { @@ -95,14 +97,16 @@ task CreateAnnotation { } parameter_meta { + # inputs referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtfFile: {description: "The reference GTF file.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - + shinyDir: {description: "The directory to write the output to.", category: "required"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dgeAnnotation: {description: "Annotation file for DGE analysis."} } } From ca452303add0b2afeabb6595e09c7a036df58fc3 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Mon, 28 Dec 2020 10:31:34 +0100 Subject: [PATCH 205/668] annotation update --- prepareShiny.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/prepareShiny.wdl b/prepareShiny.wdl index 81354a16..13cd0b1c 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -28,15 +28,15 @@ task CreateDesignMatrix { Int threads = 1 String memory = "5G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } command { set -e mkdir -p ~{shinyDir} predex design \ - -i ~{countTable} \ - -o ~{shinyDir} + --input ~{countTable} \ + --output ~{shinyDir} } output { @@ -70,19 +70,19 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - Int threads = 1 - String memory = "10G" - Int timeMinutes = 90 - String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + Int threads = 2 + String memory = "5G" + Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } command { set -e mkdir -p ~{shinyDir} predex annotation \ - -f ~{referenceFasta} \ - -g ~{referenceGtfFile} \ - -o ~{shinyDir} + --fasta ~{referenceFasta} \ + --gtf ~{referenceGtfFile} \ + --output ~{shinyDir} } output { From 48d468d7c97e4b9e3ee892ff49b3fdda4fee9de9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 Jan 2021 11:11:41 +0100 Subject: [PATCH 206/668] add note to HealthChecker --- hmftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/hmftools.wdl b/hmftools.wdl index 86d90332..760fb63f 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -270,6 +270,7 @@ task GripssHardFilterApplicationKt { } task HealthChecker { + # WIP input { String normalName String tumorName From c482e833fa60a8a138b8045dc3f044be0655599c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 Jan 2021 11:31:52 +0100 Subject: [PATCH 207/668] comment out healthchecker task and remove duplicate input in bcftools annotate --- bcftools.wdl | 4 +-- hmftools.wdl | 90 ++++++++++++++++++++++++++-------------------------- 2 files changed, 46 insertions(+), 48 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 8721540a..14889dff 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,9 +44,7 @@ task Annotate { String? regions File? regionsFile File? renameChrs - File? samplesFile - Boolean singleOverlaps = false - + File? samplesFile Int threads = 0 String memory = "256M" diff --git a/hmftools.wdl b/hmftools.wdl index 760fb63f..16313fca 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,51 +269,51 @@ task GripssHardFilterApplicationKt { } } -task HealthChecker { - # WIP - input { - String normalName - String tumorName - - String javaXmx = "10G" - } - - command { - java -Xmx10G \ - -jar /opt/tools/health-checker/3.1/health-checker.jar \ - -reference ~{normalName} \ - -tumor ~{tumorName} \ - -metrics_dir ~{metricsPath} \ - -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ - -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -output_dir ~{outputDir} - } - - # super("health-checker", - # Versions.HEALTH_CHECKER, - # "health-checker.jar", - # "10G", - # Lists.newArrayList("-reference", - # referenceSampleName, - # "-tumor", - # tumorSampleName, - # "-ref_wgs_metrics_file", - # referenceMetricsPath, - # "-tum_wgs_metrics_file", - # tumorMetricsPath, - # "-ref_flagstat_file", - # referenceFlagstatPath, - # "-tum_flagstat_file", - # tumorFlagstatPath, - # "-purple_dir", - # purplePath, - # "-output_dir", - # outputPath)); - - output { - - } -} +# task HealthChecker { +# # WIP +# input { +# String normalName +# String tumorName +# +# String javaXmx = "10G" +# } +# +# command { +# java -Xmx10G \ +# -jar /opt/tools/health-checker/3.1/health-checker.jar \ +# -reference ~{normalName} \ +# -tumor ~{tumorName} \ +# -metrics_dir ~{metricsPath} \ +# -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ +# -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ +# -output_dir ~{outputDir} +# } +# +# # super("health-checker", +# # Versions.HEALTH_CHECKER, +# # "health-checker.jar", +# # "10G", +# # Lists.newArrayList("-reference", +# # referenceSampleName, +# # "-tumor", +# # tumorSampleName, +# # "-ref_wgs_metrics_file", +# # referenceMetricsPath, +# # "-tum_wgs_metrics_file", +# # tumorMetricsPath, +# # "-ref_flagstat_file", +# # referenceFlagstatPath, +# # "-tum_flagstat_file", +# # tumorFlagstatPath, +# # "-purple_dir", +# # purplePath, +# # "-output_dir", +# # outputPath)); +# +# output { +# +# } +# } task Purple { input { From 7988dbb2259f9a396fd19c514c48731e96d49e42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Jan 2021 11:06:09 +0100 Subject: [PATCH 208/668] make reference annotation optional for gffcompare --- gffcompare.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index 50cab8a6..8b135479 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -23,7 +23,6 @@ version 1.0 task GffCompare { input { Array[File] inputGtfFiles - File referenceAnnotation # gffcmp is the default used by the program as well. This needs to be # defined in order for the output values to be consistent and correct. String outPrefix = "gffcmp" @@ -40,6 +39,7 @@ task GffCompare { Boolean debugMode = false File? inputGtfList + File? referenceAnnotation String? outputDir File? genomeSequences Int? maxDistanceFreeEndsTerminalExons @@ -64,7 +64,7 @@ task GffCompare { set -e ~{"mkdir -p " + outputDir} gffcompare \ - -r ~{referenceAnnotation} \ + ~{"-r " + referenceAnnotation} \ ~{"-o '" + totalPrefix + "'"} \ ~{"-s " + genomeSequences} \ ~{"-e " + maxDistanceFreeEndsTerminalExons} \ From c22629ff7ec5c57f113ed79e2fc2784ee915b89f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 15 Jan 2021 15:03:13 +0100 Subject: [PATCH 209/668] add linx task, add more inputs to sage --- hmftools.wdl | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 16313fca..15f54937 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -315,6 +315,110 @@ task GripssHardFilterApplicationKt { # } # } +task Linx { + input { + String sampleName + File svVcf + File svVcfIndex + Array[File]+ purpleOutput + File referenceFasta + File referenceFastaFai + File referenceFastaDict + String refGenomeVersion + String outputDir = "./linx" + File fragileSiteCsv + File lineElementCsv + File replicationOriginsBed + File viralHostsCsv + File knownFusionCsv + File driverGenePanel + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 30 + String dockerImage = "docker://quay.io/biocontainers/hmftools-linx:1.12--0" + } + + command { + linx -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -sv_vcf ~{svVcf} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -output_dir ~{outputDir} \ + -fragile_site_file ~{fragileSiteCsv} \ + -line_element_file ~{lineElementCsv} \ + -replication_origins_file ~{replicationOriginsBed} \ + -viral_hosts_file ~{viralHostsCsv} \ + -gene_transcripts_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -check_fusions \ + -known_fusion_file ~{knownFusionCsv} \ + -check_drivers \ + -driver_gene_panel ~{driverGenePanel} \ + -chaining_sv_limit 0 \ + -write_vis_data + } + + output { + File driverCatalog = "~{outputDir}/~{sampleName}.driver.catalog.tsv" + File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" + File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" + File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" + File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" + File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" + File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" + File linxViralInserts = "~{outputDir}/~{sampleName}.linx.viral_inserts.tsv" + File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" + File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" + File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" + File linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv" + File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" + File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" + File linxVersion = "~{outputDir}/linx.version" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category: "required"} + svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} + svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} + purpleOutput: {description: "The files produced by PURPLE.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} + lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} + replicationOriginsBed: {description: "Replication timing input in BED format with replication timing as the 4th column.", category: "required"} + viralHostsCsv: {description: "A list of the viruses which were used for annotation of the GRIDSS results.", category: "required"} + knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Purple { input { String normalName @@ -419,7 +523,7 @@ task Purple { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - driverGenePanel: {description: "A bed file describing the driver gene panel.", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} @@ -444,11 +548,20 @@ task Sage { File panelBed File highConfidenceBed Boolean hg38 = false + Boolean panelOnly = false String outputPath = "./sage.vcf.gz" String? normalName File? normalBam File? normalBamIndex + Int? hotspotMinTumorQual + Int? panelMinTumorQual + Int? hotspotMaxGermlineVaf + Int? hotspotMaxGermlineRelRawBaseQual + Int? panelMaxGermlineVaf + Int? panelMaxGermlineRelRawBaseQual + String? mnvFilterEnabled + File? coverageBed Int threads = 2 String javaXmx = "32G" @@ -470,6 +583,15 @@ task Sage { -panel_bed ~{panelBed} \ -high_confidence_bed ~{highConfidenceBed} \ -assembly ~{true="hg38" false="hg19" hg38} \ + ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \ + ~{"-panel_min_tumor_qual " + panelMinTumorQual} \ + ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \ + ~{"-hotspot_max_germline_rel_raw_base_qual " + hotspotMaxGermlineRelRawBaseQual} \ + ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ + ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ + ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ + ~{"-coverage_bed " + coverage_bed} \ + ~{true="-panel_only" false="" panelOnly} \ -threads ~{threads} \ -out ~{outputPath} } @@ -502,6 +624,13 @@ task Sage { hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} + hotspotMinTumorQual: {description: "Equivalent to sage's `hotspot_min_tumor_qual` option.", category: "advanced"} + panelMinTumorQual: {description: "Equivalent to sage's `panel_min_tumor_qual` option.", category: "advanced"} + hotspotMaxGermlineVaf: {description: "Equivalent to sage's `hotspot_max_germline_vaf` option.", category: "advanced"} + hotspotMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `hotspot_max_germline_rel_raw_base_qual` option.", category: "advanced"} + panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} + panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} + mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 111a42bf79d1fb8fa6a34d7b567dc4fc04f67e7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 21 Jan 2021 14:23:53 +0100 Subject: [PATCH 210/668] fix typos --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 15f54937..6de3f777 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -590,7 +590,7 @@ task Sage { ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ - ~{"-coverage_bed " + coverage_bed} \ + ~{"-coverage_bed " + coverageBed} \ ~{true="-panel_only" false="" panelOnly} \ -threads ~{threads} \ -out ~{outputPath} From 96fa1bc6ba59825f051c0577d414027fd58f10c4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jan 2021 16:38:49 +0100 Subject: [PATCH 211/668] fix some issues, add flagstat --- bcftools.wdl | 1 - hmftools.wdl | 6 +++++- picard.wdl | 4 ++-- sambamba.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 14889dff..b239320d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -106,7 +106,6 @@ task Annotate { inputFile: {description: "A vcf or bcf file.", category: "required"} inputFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "common"} annsFileIndex: {description: "The index for annsFile.", category: "common"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} diff --git a/hmftools.wdl b/hmftools.wdl index 6de3f777..67c49be3 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -341,7 +341,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 30 - String dockerImage = "docker://quay.io/biocontainers/hmftools-linx:1.12--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.12--0" } command { @@ -381,6 +381,10 @@ task Linx { File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" File linxVersion = "~{outputDir}/linx.version" + Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, + linxLinks, linxSvs, linxViralInserts, linxVisCopyNumber, + linxVisFusion, linxVisGeneExon, linxVisProteinDomain, + linxVisSegments, linxVisSvData, linxVersion] } runtime { diff --git a/picard.wdl b/picard.wdl index d52b9cc7..8dc4e0bf 100644 --- a/picard.wdl +++ b/picard.wdl @@ -473,10 +473,10 @@ task CollectWgsMetrics { CollectWgsMetrics \ REFERENCE_SEQUENCE=~{referenceFasta} \ INPUT=~{inputBam} \ - OUTPUT=~{outputPath} \ + OUTPUT=~{outputPath} \ ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ - ~{"OVERAGE_CAP=" + coverageCap} + ~{"COVERAGE_CAP=" + coverageCap} } output { diff --git a/sambamba.wdl b/sambamba.wdl index 0e9a901c..bb63f665 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -20,6 +20,49 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task Flagstat { + input { + File inputBam + File inputBamIndex + String outputPath = "./flagstat.txt" + + Int threads = 2 + String memory = "8G" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + } + + command { + sambamba flagstat \ + -t ~{threads} \ + ~{inputBam} \ + > ~{outputPath} + } + + output { + File stats = outputPath + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputBam: {description: "The input BAM file.", category: "required"} + inputBamIndex: {description: "The index for the BAM file.", category: "required"} + outputPath: {description: "The path to write the ouput to.", category: "required"} + + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + + task Markdup { input { Array[File] inputBams From 8b51723e40a28d8894015f8b4dad21fcb0cb4bd1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jan 2021 16:39:56 +0100 Subject: [PATCH 212/668] add extractSigPredictHRD --- extractSigPredictHRD.wdl | 69 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 extractSigPredictHRD.wdl diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl new file mode 100644 index 00000000..6aa5ff1d --- /dev/null +++ b/extractSigPredictHRD.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task ExtractSigPredictHRD { + input { + String outputDir = "." + String sampleName + File snvIndelVcf + File snvIndelVcfIndex + File svVcf + File svVcfIndex + + String memory = "8G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" + } + + command { + extractSigPredictHRD.R \ + ~{outputDir} \ + ~{sampleName} \ + ~{snvIndelVcf} \ + ~{svVcf} \ + } + + output { + File chordPrediction = "~{outputDir}/~{sampleName}_chord_prediction.txt" + File chordSignatures = "~{outputDir}/~{sampleName}_chord_signatures.txt" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + outputDir: {description: "The directory the outout will be written to.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + snvIndelVcf: {description: "A VCF file with SNVs and indels.", category: "required"} + snvIndelVcfIndex: {description: "The index for the SNV/indel VCF file.", category: "required"} + svVcf: {description: "A VCF file with SVs.", category: "required"} + svVcfIndex: {description: "The index for the SV VCF file.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file From a4ebccba572cb4b0114c80b91083eafc203fa92b Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Thu, 4 Feb 2021 09:22:33 +0100 Subject: [PATCH 213/668] change threads --- prepareShiny.wdl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/prepareShiny.wdl b/prepareShiny.wdl index 13cd0b1c..d669e2d1 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -25,7 +25,6 @@ task CreateDesignMatrix { File countTable String shinyDir = "." - Int threads = 1 String memory = "5G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" @@ -44,7 +43,6 @@ task CreateDesignMatrix { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -53,8 +51,7 @@ task CreateDesignMatrix { parameter_meta { # inputs countTable: {description: "The created count table from HTseq.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - threads: {description: "The number of threads to use.", category: "advanced"} + shinyDir: {description: "The directory to write the output to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -70,7 +67,6 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - Int threads = 2 String memory = "5G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" @@ -90,7 +86,6 @@ task CreateAnnotation { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -100,8 +95,7 @@ task CreateAnnotation { # inputs referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtfFile: {description: "The reference GTF file.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - threads: {description: "The number of threads to use.", category: "advanced"} + shinyDir: {description: "The directory to write the output to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 558c8088dee1d252fb668303874684fd62741409 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 4 Feb 2021 15:38:11 +0100 Subject: [PATCH 214/668] add health-checker --- hmftools.wdl | 106 +++++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 45 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 67c49be3..5bad1dbe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,51 +269,67 @@ task GripssHardFilterApplicationKt { } } -# task HealthChecker { -# # WIP -# input { -# String normalName -# String tumorName -# -# String javaXmx = "10G" -# } -# -# command { -# java -Xmx10G \ -# -jar /opt/tools/health-checker/3.1/health-checker.jar \ -# -reference ~{normalName} \ -# -tumor ~{tumorName} \ -# -metrics_dir ~{metricsPath} \ -# -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ -# -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -# -output_dir ~{outputDir} -# } -# -# # super("health-checker", -# # Versions.HEALTH_CHECKER, -# # "health-checker.jar", -# # "10G", -# # Lists.newArrayList("-reference", -# # referenceSampleName, -# # "-tumor", -# # tumorSampleName, -# # "-ref_wgs_metrics_file", -# # referenceMetricsPath, -# # "-tum_wgs_metrics_file", -# # tumorMetricsPath, -# # "-ref_flagstat_file", -# # referenceFlagstatPath, -# # "-tum_flagstat_file", -# # tumorFlagstatPath, -# # "-purple_dir", -# # purplePath, -# # "-output_dir", -# # outputPath)); -# -# output { -# -# } -# } +task HealthChecker { + # WIP + input { + String outputDir = "." + String normalName + File normalFlagstats + File normalMetrics + String tumorName + File tumorFlagstats + File tumorMetrics + Array[File]+ purpleOutput + + String javaXmx = "10G" + String memory = "11G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biowdl/health-checker:3.2" + } + + command { + set -e + mkdir -p ~{outputDir} + health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -reference ~{normalName} \ + -ref_flagstat_file ~{normalFlagstats} \ + -ref_wgs_metrics_file ~{normalMetrics} \ + -tumor ~{tumorName} \ + -tum_flagstat_file ~{tumorFlagstats} \ + -tum_wgs_metrics_file ~{tumorMetrics} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -output_dir ~{outputDir} + } + + + output { + File? healthCheckSucceeded = "~{outputDir}/~{tumorName}.HealthCheckSucceeded" + File? healthCheckFailed = "~{outputDir}/~{tumorName}.HealthCheckFailed" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + outputDir: {description: "The path the output will be written to.", category:"required"} + normalName: {description: "The name of the normal sample.", category: "required"} + normalFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + normalMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} + tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} + purpleOutput: {description: "The files from purple's output directory.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} task Linx { input { From eac2b302158e412df419705eba39ebaeedc1c11f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 9 Feb 2021 16:10:52 +0100 Subject: [PATCH 215/668] small adjustments --- bwa.wdl | 4 +++- gridss.wdl | 6 +++--- hmftools.wdl | 12 ++++++------ sambamba.wdl | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 203f0dde..e2393481 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -28,6 +28,7 @@ task Mem { String outputPrefix Boolean sixtyFour = false Boolean usePostalt = false + Boolean useSoftclippingForSupplementary = false Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 @@ -36,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 260 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 500 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -56,6 +57,7 @@ task Mem { mkdir -p "$(dirname ~{outputPrefix})" bwa mem \ -t ~{threads} \ + ~{if useSoftclippingForSupplementary then "-Y" else ""} \ ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ ~{bwaIndex.fastaFile} \ ~{read1} \ diff --git a/gridss.wdl b/gridss.wdl index 9bafa6d6..0148fcf6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,9 +35,9 @@ task GRIDSS { String? normalLabel Int jvmHeapSizeGb = 30 - Int threads = 2 - Int timeMinutes = ceil(2880 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + Int threads = 4 + Int timeMinutes = ceil(5760 / threads) + 10 + String dockerImage = "quay.io/biocontainers/gridss:2.9.3--0" } command { diff --git a/hmftools.wdl b/hmftools.wdl index 5bad1dbe..90564060 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -181,13 +181,13 @@ task GripssApplicationKt { String memory = "25G" String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ @@ -234,13 +234,13 @@ task GripssHardFilterApplicationKt { String memory = "25G" String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} diff --git a/sambamba.wdl b/sambamba.wdl index bb63f665..5284363e 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 16) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 32) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 2792266fa2950ec9cbe15530374465a99c65a43a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 Feb 2021 09:52:04 +0100 Subject: [PATCH 216/668] update versions, memory, etc --- bwa.wdl | 2 +- extractSigPredictHRD.wdl | 2 ++ gridss.wdl | 9 ++++++++- hmftools.wdl | 29 ++++++++++++++--------------- sambamba.wdl | 6 +++--- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index e2393481..faa4121a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.5) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 6aa5ff1d..69c41ef8 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -28,6 +28,7 @@ task ExtractSigPredictHRD { File snvIndelVcfIndex File svVcf File svVcfIndex + Boolean hg38 = false String memory = "8G" Int timeMinutes = 15 @@ -40,6 +41,7 @@ task ExtractSigPredictHRD { ~{sampleName} \ ~{snvIndelVcf} \ ~{svVcf} \ + ~{if hg38 then "RG_38" else "RG_37"} } output { diff --git a/gridss.wdl b/gridss.wdl index 0148fcf6..98d730cf 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,23 +33,28 @@ task GRIDSS { File? normalBam File? normalBai String? normalLabel + File? blacklistBed + File? repeatmaskerBed Int jvmHeapSizeGb = 30 Int threads = 4 Int timeMinutes = ceil(5760 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.3--0" + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } command { set -e mkdir -p "$(dirname ~{outputPrefix})" gridss \ + -w . \ --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + ~{"--blacklist " + blacklistBed} \ + ~{"--repeatmaskerbed " + repeatmaskerBed} ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz @@ -80,6 +85,8 @@ task GRIDSS { normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} + blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} + repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} diff --git a/hmftools.wdl b/hmftools.wdl index 90564060..e98ac7ba 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -115,7 +115,7 @@ task Cobalt { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1200 - String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } command { @@ -172,6 +172,8 @@ task GripssApplicationKt { input { File inputVcf String outputPath = "gripss.vcf.gz" + String tumorName + String normalName File referenceFasta File referenceFastaFai File referenceFastaDict @@ -182,13 +184,15 @@ task GripssApplicationKt { String memory = "25G" String javaXmx = "24G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ + -tumor ~{tumorName} \ + ~reference ~{normalName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ @@ -235,12 +239,12 @@ task GripssHardFilterApplicationKt { String memory = "25G" String javaXmx = "24G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} @@ -357,7 +361,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.12--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" } command { @@ -455,13 +459,13 @@ task Purple { File referenceFastaFai File referenceFastaDict File driverGenePanel - File hotspots + File somaticHotspots Int threads = 1 Int timeMinutes = 60 String memory = "13G" String javaXmx = "12G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.51--1" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" } command { @@ -479,13 +483,8 @@ task Purple { -ref_genome ~{referenceFasta} \ -driver_catalog \ -driver_gene_panel ~{driverGenePanel} \ - -hotspots ~{hotspots} \ + -somatic_hotspots ~{somaticHotspots} \ -threads ~{threads} - - # TODO if shallow also the following: - #-highly_diploid_percentage 0.88 \ - #-somatic_min_total 100 \ - #-somatic_min_purity_spread 0.1 } output { @@ -587,7 +586,7 @@ task Sage { String javaXmx = "32G" String memory = "33G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" } command { diff --git a/sambamba.wdl b/sambamba.wdl index 5284363e..b6ef5e9b 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -34,8 +34,8 @@ task Flagstat { command { sambamba flagstat \ - -t ~{threads} \ - ~{inputBam} \ + -t ~{threads} \ + ~{inputBam} \ > ~{outputPath} } @@ -84,7 +84,7 @@ task Markdup { # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 32) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 943f9541ebc002ea576898067b7f220112cb79fc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 Feb 2021 13:56:15 +0100 Subject: [PATCH 217/668] fix parameter_meta purple --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index e98ac7ba..3fe845a6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -543,7 +543,7 @@ task Purple { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + somaticHotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 8283c5099ba6fad50b34043033380e2898d3db66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 18 Feb 2021 11:03:27 +0100 Subject: [PATCH 218/668] fix missing backslash --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 98d730cf..b4b36b01 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -54,7 +54,7 @@ task GRIDSS { ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ - ~{"--repeatmaskerbed " + repeatmaskerBed} + ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz From adc3523872df29405e1741eaa2dfa2a67e61a51d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 11:00:46 +0100 Subject: [PATCH 219/668] fix sage --- hmftools.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3fe845a6..49e4eeb4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -590,9 +590,7 @@ task Sage { } command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ + SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ From a8314de9c3a2746eb44bf041fe1849c49241e547 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 16:30:52 +0100 Subject: [PATCH 220/668] add -c to stringtie --- stringtie.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stringtie.wdl b/stringtie.wdl index d3a6f73d..9c2f3cfc 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -31,6 +31,7 @@ task Stringtie { Boolean? firstStranded Boolean? secondStranded String? geneAbundanceFile + Float? minimumCoverage Int threads = 1 String memory = "2G" @@ -47,6 +48,7 @@ task Stringtie { ~{true="-e" false="" skipNovelTranscripts} \ ~{true="--rf" false="" firstStranded} \ ~{true="--fr" false="" secondStranded} \ + ~{"-c " + minimumCoverage} \ -o ~{assembledTranscriptsFile} \ ~{"-A " + geneAbundanceFile} \ ~{bam} @@ -74,6 +76,7 @@ task Stringtie { firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"} secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"} geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"} + minimumCoverage: {description: "The minimum coverage for a transcript to be shown in the output.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From f468bd568b5d9fcbd66872934837a4f88a4f2f0b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 16:43:57 +0100 Subject: [PATCH 221/668] add index to htseq --- htseq.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/htseq.wdl b/htseq.wdl index dfa3fcf2..ef4ae0a3 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -23,6 +23,7 @@ version 1.0 task HTSeqCount { input { Array[File]+ inputBams + Array[File]+ inputBamIndexes File gtfFile String outputTable = "output.tsv" String order = "pos" @@ -34,7 +35,7 @@ task HTSeqCount { Int nprocesses = 1 String memory = "8G" - Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) + Int timeMinutes = 1440 #10 + ceil(size(inputBams, "G") * 60) FIXME String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" } From 070a5d81abd11bc0318f4957b7ef418df2f61c40 Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 17:39:45 +0100 Subject: [PATCH 222/668] task: add duphold.wdl --- duphold.wdl | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 duphold.wdl diff --git a/duphold.wdl b/duphold.wdl new file mode 100644 index 00000000..80fe31d2 --- /dev/null +++ b/duphold.wdl @@ -0,0 +1,75 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Duphold { + input { + File inputVcf + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputPath = "./duphold.vcf" + + String memory = "15G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + export DUPHOLD_SAMPLE_NAME=~{sample} + duphold \ + -v ~{inputVcf} \ + -b ~{bamFile} \ + -f ~{referenceFasta} \ + -o ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputVcf: {description: "The VCF file to process.", category: "required"} + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } + sample: {description: "The name of the sample.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Duphold annotated VCF file."} + } +} From aef20c2a69816a367700441ba9d4a121faf9a72f Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 17:42:24 +0100 Subject: [PATCH 223/668] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 424dc764..8d6d1b76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Duphold: add duphold.wdl + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. From 5fc58ce1f5585a5bb4078b095674b67aba8d8f7d Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 21:43:20 +0100 Subject: [PATCH 224/668] add bcftools view filtering options --- bcftools.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28380dea..5f6c2a16 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,7 +290,10 @@ task View { input { File inputFile String outputPath = "output.vcf" - + + String? exclude + String? include + Boolean excludeUncalled = false String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -302,6 +305,7 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ + ~{true="--exclude-uncalled" false="" firstAlleleOnly} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -324,6 +328,8 @@ task View { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 70d7a2b361a8faa2cab4b02accd2abd8da3068d0 Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 23:51:17 +0100 Subject: [PATCH 225/668] add option for bcftools view filtering --- bcftools.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5f6c2a16..50b08ee6 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,10 +290,9 @@ task View { input { File inputFile String outputPath = "output.vcf" - String? exclude String? include - Boolean excludeUncalled = false + Boolean excludeUncalled = false String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -305,7 +304,9 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ - ~{true="--exclude-uncalled" false="" firstAlleleOnly} \ + ~{"--exclude " + exclude} \ + ~{"--include " + include} \ + ~{true="--exclude-uncalled" false="" excludeUncalled} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -330,6 +331,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + excludeUncalled: {description: "exclude sites without a called genotype (see man page for details).", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 42f6cd2a9c38ba2da8f07db2f7df17b70d99a5d9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 22 Feb 2021 10:27:48 +0100 Subject: [PATCH 226/668] fix purple output for newer version --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 49e4eeb4..31330a7d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -488,7 +488,7 @@ task Purple { } output { - File driverCatalogTsv = "~{outputDir}/~{tumorName}.driver.catalog.tsv" + File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" @@ -512,7 +512,7 @@ task Purple { File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" - Array[File] outputs = [driverCatalogTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, From 2d41a2e22783b6208c1cdf8e7906e388bbfb7a89 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 22 Feb 2021 13:26:08 +0100 Subject: [PATCH 227/668] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d6d1b76..cbd083c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled) + Duphold: add duphold.wdl + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. From 8238579043ccd2df72ef7b270e9d44248b257715 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:27 +0100 Subject: [PATCH 228/668] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbd083c3..4ee68a91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled) ++ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. From ff4edf7a505234bef2e3102d06152148ae84eaa0 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:33 +0100 Subject: [PATCH 229/668] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ee68a91..5e175c6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). -+ Duphold: add duphold.wdl ++ Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. From 505c4fc02f8fa22cd512e1c890a984febcd89531 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:40 +0100 Subject: [PATCH 230/668] Update bcftools.wdl Co-authored-by: Jasper --- bcftools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 50b08ee6..d01a0c03 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,9 +290,11 @@ task View { input { File inputFile String outputPath = "output.vcf" + Boolean excludeUncalled = false + String? exclude String? include - Boolean excludeUncalled = false + String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" From 119e2aca92129ccd520ea4f0d9ab8ca768330e7e Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:46 +0100 Subject: [PATCH 231/668] Update bcftools.wdl Co-authored-by: Jasper --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index d01a0c03..4dc4edb5 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -333,7 +333,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} - excludeUncalled: {description: "exclude sites without a called genotype (see man page for details).", category: "advanced"} + excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ee6e66bea74597352161d3da231ce4df45acf39e Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 4 Mar 2021 16:42:38 +0100 Subject: [PATCH 232/668] add tmpdir --- umi-tools.wdl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 6524d656..b05fcace 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -78,10 +78,13 @@ task Dedup { File inputBam File inputBamIndex String outputBamPath + String tmpDir + Boolean paired = true String? umiSeparator String? statsPrefix + String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) @@ -93,13 +96,14 @@ task Dedup { command { set -e - mkdir -p "$(dirname ~{outputBamPath})" + mkdir -p "$(dirname ~{outputBamPath})" "~{tmpDir}" umi_tools dedup \ - --stdin ~{inputBam} \ - --stdout ~{outputBamPath} \ + --stdin=~{inputBam} \ + --stdout=~{outputBamPath} \ ~{"--output-stats " + statsPrefix} \ ~{"--umi-separator=" + umiSeparator} \ - ~{true="--paired" false="" paired} + ~{true="--paired" false="" paired} \ + --temp-dir=~{tmpDir} \ samtools index ~{outputBamPath} ~{outputBamIndex} } @@ -122,6 +126,7 @@ task Dedup { inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} + outputBamPath: {description: "Temporary directory.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} From 4edc1284f86c713dd5e23e8dba79c8a0f3a20219 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 4 Mar 2021 17:03:12 +0100 Subject: [PATCH 233/668] update umi-tools.wdl --- CHANGELOG.md | 1 + umi-tools.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e175c6d..437294cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ UMI-tools (dedup): Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. diff --git a/umi-tools.wdl b/umi-tools.wdl index b05fcace..db888603 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -126,7 +126,7 @@ task Dedup { inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} - outputBamPath: {description: "Temporary directory.", category: "advanced"} + tmpDir: {description: "Temporary directory.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} From 2a151b5014c34ea28498da909806cfa70da65d47 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 8 Mar 2021 10:27:12 +0100 Subject: [PATCH 234/668] add default tmpdir --- umi-tools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index db888603..5e08e14d 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -78,7 +78,7 @@ task Dedup { File inputBam File inputBamIndex String outputBamPath - String tmpDir + String tmpDir = "./umiToolsDedupTmpDir" Boolean paired = true From f8f2b9e4058d29bdd21bb92694bb425c3724f31b Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 8 Mar 2021 10:27:55 +0100 Subject: [PATCH 235/668] update default dockerimage --- umi-tools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 5e08e14d..1a7db327 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -89,7 +89,7 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" + String dockerImage = "quay.io/biocontainers/umi_tools:1.1.1--py38h0213d0e_1" } String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") From 3b5f1476fb34d215d6332b127995ff3ab1b82f20 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 8 Mar 2021 11:02:53 +0100 Subject: [PATCH 236/668] remove umitools deduped BAM index output --- umi-tools.wdl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 1a7db327..e909e481 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -103,13 +103,11 @@ task Dedup { ~{"--output-stats " + statsPrefix} \ ~{"--umi-separator=" + umiSeparator} \ ~{true="--paired" false="" paired} \ - --temp-dir=~{tmpDir} \ - samtools index ~{outputBamPath} ~{outputBamIndex} + --temp-dir=~{tmpDir} } output { File deduppedBam = outputBamPath - File deduppedBamIndex = outputBamIndex File? editDistance = "~{statsPrefix}_edit_distance.tsv" File? umiStats = "~{statsPrefix}_per_umi.tsv" File? positionStats = "~{statsPrefix}_per_umi_per_position.tsv" @@ -136,7 +134,6 @@ task Dedup { # outputs deduppedBam: {description: "Deduplicated BAM file."} - deduppedBamIndex: {description: "Index of the deduplicated BAM file."} editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."} umiStats: {description: "UMI-level summary statistics."} positionStats: {description: "The counts for unique combinations of UMI and position."} From 0f448cf27ea9812f938a37cb783bd7ce115d32a6 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Mon, 8 Mar 2021 13:08:38 +0100 Subject: [PATCH 237/668] Update umi-tools.wdl Co-authored-by: Davy Cats --- umi-tools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 1a7db327..20f1a37e 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -84,7 +84,6 @@ task Dedup { String? umiSeparator String? statsPrefix - String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) From 7c8209efa3f8c9ed6d9c716c3357008d8be7e809 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 8 Mar 2021 13:09:50 +0100 Subject: [PATCH 238/668] remove comment --- umi-tools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index e909e481..e3c833f8 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -88,7 +88,6 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). String dockerImage = "quay.io/biocontainers/umi_tools:1.1.1--py38h0213d0e_1" } From 8623c57dbca49543e4a5ee8108316ef46242bcde Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Mar 2021 14:05:13 +0100 Subject: [PATCH 239/668] add circos configs to purple output --- hmftools.wdl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 31330a7d..2fad41fe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -512,6 +512,17 @@ task Purple { File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" + File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" + File circosCond = "~{outputDir}/circos/~{tumorName}.circos.conf" + File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" + File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" + File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" + File circosGaps = "~{outputDir}/circos/gaps.txt" + File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" + File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos" + File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf" + File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" + File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, @@ -519,6 +530,7 @@ task Purple { purpleVersion] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] + Array[File] circos = [] } runtime { From 13967b1793fc585d9f3753d87b618fd2c6819736 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Mar 2021 14:13:06 +0100 Subject: [PATCH 240/668] add array for circos confs --- hmftools.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2fad41fe..dc31f41b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -513,7 +513,7 @@ task Purple { File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" - File circosCond = "~{outputDir}/circos/~{tumorName}.circos.conf" + File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" @@ -530,7 +530,9 @@ task Purple { purpleVersion] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] - Array[File] circos = [] + Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, + circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, + circosSnp] } runtime { From beb5444092b8dea12fe0674a40bd4326d1daf426 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 17 Mar 2021 11:01:31 +0100 Subject: [PATCH 241/668] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 437294cd..c0a79fc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ UMI-tools: re-introduce samtools indexing ++ UMI-tools: update default dockerImage + UMI-tools (dedup): Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. From 2410d0d5c2415f234739f63bbef913f5f531eab7 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 17 Mar 2021 11:02:01 +0100 Subject: [PATCH 242/668] update dockerImage and re-introduce samtools indexing --- umi-tools.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 6b3aa697..a09ca642 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -31,7 +31,7 @@ task Extract { Boolean threePrime = false Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" } command { @@ -87,7 +87,7 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - String dockerImage = "quay.io/biocontainers/umi_tools:1.1.1--py38h0213d0e_1" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" } String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") @@ -102,10 +102,12 @@ task Dedup { ~{"--umi-separator=" + umiSeparator} \ ~{true="--paired" false="" paired} \ --temp-dir=~{tmpDir} + samtools index ~{outputBamPath} ~{outputBamIndex} } output { File deduppedBam = outputBamPath + File deduppedBamIndex = outputBamIndex File? editDistance = "~{statsPrefix}_edit_distance.tsv" File? umiStats = "~{statsPrefix}_per_umi.tsv" File? positionStats = "~{statsPrefix}_per_umi_per_position.tsv" @@ -132,6 +134,7 @@ task Dedup { # outputs deduppedBam: {description: "Deduplicated BAM file."} + deduppedBamIndex: {description: "Index of the deduplicated BAM file."} editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."} umiStats: {description: "UMI-level summary statistics."} positionStats: {description: "The counts for unique combinations of UMI and position."} From 2a601648e8728305452e244bb95e296ad5d2441b Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 18 Mar 2021 10:59:24 +0100 Subject: [PATCH 243/668] update CHANGELOG --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0a79fc4..64f40df6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,9 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ UMI-tools: re-introduce samtools indexing -+ UMI-tools: update default dockerImage -+ UMI-tools (dedup): Add tempdir ++ UMI-tools (v1.1.1): re-introduce samtools indexing ++ UMI-tools (v1.1.1): update default dockerImage ++ UMI-tools dedup (v1.1.1): Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. From c791c96a60e6eee1c104cda7b884039a67be53d4 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Thu, 18 Mar 2021 11:22:09 +0100 Subject: [PATCH 244/668] Update CHANGELOG.md Co-authored-by: Davy Cats --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64f40df6..c204ba24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,9 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ UMI-tools (v1.1.1): re-introduce samtools indexing -+ UMI-tools (v1.1.1): update default dockerImage -+ UMI-tools dedup (v1.1.1): Add tempdir ++ UMI-tools: re-introduce samtools indexing ++ UMI-tools: update default dockerImage to use umitools v1.1.1 ++ UMI-tools dedup: Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. From 359456efd96ccd2326657e5dec543c5a73efd92c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Mar 2021 17:07:41 +0100 Subject: [PATCH 245/668] increase time and memory for picard collectWgsMetrics --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index 8dc4e0bf..aefb4c21 100644 --- a/picard.wdl +++ b/picard.wdl @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String memory = "33G" + String javaXmx = "32G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } From 55818f8742b709e9bb4007d4e529878ba0aa47e8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 22 Mar 2021 12:47:21 +0100 Subject: [PATCH 246/668] update dockerImage --- umi-tools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index a09ca642..86bf1314 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -31,7 +31,7 @@ task Extract { Boolean threePrime = false Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } command { @@ -87,7 +87,7 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") From 9c36780b3c24d40cb0ed7bb37c1c3b0c41d2269e Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 23 Mar 2021 09:57:29 +0100 Subject: [PATCH 247/668] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c204ba24..f3b04d4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) + UMI-tools: re-introduce samtools indexing + UMI-tools: update default dockerImage to use umitools v1.1.1 + UMI-tools dedup: Add tempdir From 5db3dd912fbf3b8cdaefefe198a59e998ebdd89a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Mar 2021 11:15:41 +0100 Subject: [PATCH 248/668] update memory and timeMinutes for cutadapt and bwa --- bwa.wdl | 4 ++-- cutadapt.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index faa4121a..cc8ea0c6 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 500 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 500 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.5) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..bca29db3 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -84,7 +84,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } From 2aba7899cdf1a76d2afa089e230335bf0843b72c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Mar 2021 12:59:54 +0100 Subject: [PATCH 249/668] increase memory bwa --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index cc8ea0c6..670f00d2 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 3) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. From f83b315ebb5318147ce3f08d8ba0d313146753d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 30 Mar 2021 09:55:44 +0200 Subject: [PATCH 250/668] add more memory to sambamba markdup --- sambamba.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index b6ef5e9b..b4eca66b 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize + # Added 2024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 2048 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 0862eab38451da3de6615ad419ea56402690e7a0 Mon Sep 17 00:00:00 2001 From: dcats Date: Mon, 12 Apr 2021 16:50:33 +0200 Subject: [PATCH 251/668] memory and runtime adjustements --- bcftools.wdl | 4 ++-- gridss.wdl | 4 ++-- hmftools.wdl | 10 +++++----- sambamba.wdl | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 4827a631..28b62696 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "256M" - Int timeMinutes = 10 + ceil(size(inputFile, "G")) + String memory = "5G" + Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/gridss.wdl b/gridss.wdl index b4b36b01..11014a88 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -36,7 +36,7 @@ task GRIDSS { File? blacklistBed File? repeatmaskerBed - Int jvmHeapSizeGb = 30 + Int jvmHeapSizeGb = 64 Int threads = 4 Int timeMinutes = ceil(5760 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" @@ -70,7 +70,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 1}G" + memory: "~{jvmHeapSizeGb + 25}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/hmftools.wdl b/hmftools.wdl index dc31f41b..553879f9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "33G" - String javaXmx = "32G" + String memory = "52G" + String javaXmx = "50G" Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -341,7 +341,7 @@ task Linx { File svVcf File svVcfIndex Array[File]+ purpleOutput - File referenceFasta + File referenceFasta #FIXME Not used in pipeline5? File referenceFastaFai File referenceFastaDict String refGenomeVersion @@ -597,8 +597,8 @@ task Sage { File? coverageBed Int threads = 2 - String javaXmx = "32G" - String memory = "33G" + String javaXmx = "50G" + String memory = "75G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" } diff --git a/sambamba.wdl b/sambamba.wdl index b4eca66b..c8d9e11c 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -69,7 +69,7 @@ task Markdup { String outputPath Int compressionLevel = 1 # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1. - Int sortBufferSize = 2048 + Int sortBufferSize = 4096 Int ioBufferSize = 128 Boolean removeDuplicates = false @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 2024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 2048 + sortBufferSize + 2 * ioBufferSize + # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 22933762f7683b98535da38de2954db41c44be37 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:31:58 +0200 Subject: [PATCH 252/668] add germline options to purple --- hmftools.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 553879f9..e8b60bc0 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -452,6 +452,7 @@ task Purple { Array[File]+ cobaltOutput File gcProfile File somaticVcf + File germlineVcf File filteredSvVcf File fullSvVcf File fullSvVcfIndex @@ -460,6 +461,7 @@ task Purple { File referenceFastaDict File driverGenePanel File somaticHotspots + File germlineHotspots Int threads = 1 Int timeMinutes = 60 @@ -477,6 +479,7 @@ task Purple { -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ + -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ @@ -484,6 +487,7 @@ task Purple { -driver_catalog \ -driver_gene_panel ~{driverGenePanel} \ -somatic_hotspots ~{somaticHotspots} \ + -germline_hotspots ~{germlineHotspots} \ -threads ~{threads} } @@ -550,6 +554,7 @@ task Purple { cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"} gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} somaticVcf: {description: "The somatic variant calling results.", category: "required"} + germlineVcf: {description: "The germline variant calling results.", category: "required"} filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"} fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} @@ -557,7 +562,8 @@ task Purple { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - somaticHotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} + germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0013a03155aed7748864308f9fda5b4f07d79706 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:37:54 +0200 Subject: [PATCH 253/668] remove ref_genome from Linx --- hmftools.wdl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index e8b60bc0..1a99caf6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -341,9 +341,6 @@ task Linx { File svVcf File svVcfIndex Array[File]+ purpleOutput - File referenceFasta #FIXME Not used in pipeline5? - File referenceFastaFai - File referenceFastaDict String refGenomeVersion String outputDir = "./linx" File fragileSiteCsv @@ -369,7 +366,6 @@ task Linx { -sample ~{sampleName} \ -sv_vcf ~{svVcf} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ -output_dir ~{outputDir} \ -fragile_site_file ~{fragileSiteCsv} \ From bf43886539cb8d40d5b9637e3920ffba8d5f80a0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:41:07 +0200 Subject: [PATCH 254/668] remove unused parameter_meta --- hmftools.wdl | 3 --- 1 file changed, 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1a99caf6..48c6099c 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -414,9 +414,6 @@ task Linx { svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} purpleOutput: {description: "The files produced by PURPLE.", category: "required"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} From a4d5102d42edf0d7d5795f5860817b38e680e597 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 14 Apr 2021 13:27:26 +0200 Subject: [PATCH 255/668] add gridss properties --- gridss.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 11014a88..ef5ae9e5 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,6 +35,7 @@ task GRIDSS { String? normalLabel File? blacklistBed File? repeatmaskerBed + File? gridssProperties Int jvmHeapSizeGb = 64 Int threads = 4 @@ -50,9 +51,10 @@ task GRIDSS { --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ + ~{"-c " + gridssProperties} \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ - --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ @@ -87,6 +89,7 @@ task GRIDSS { normalLabel: {description: "The name of the normal sample.", category: "advanced"} blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} + gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} From e81de32b4db6b48ff458f368b253010bcbff7187 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 15 Apr 2021 11:50:41 +0200 Subject: [PATCH 256/668] upgrade sage version --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 48c6099c..0a566d8e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -599,7 +599,7 @@ task Sage { String javaXmx = "50G" String memory = "75G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" } command { From 51e524a7fa1ffe7664882941e7fc0ffc7aa14ad3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 20 Apr 2021 12:25:16 +0200 Subject: [PATCH 257/668] add missing purple outputs, fix typo --- hmftools.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 0a566d8e..3dd52daf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -192,13 +192,13 @@ task GripssApplicationKt { -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ - ~reference ~{normalName} \ + -reference ~{normalName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ -breakpoint_pon ~{breakpointPon} \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} } output { @@ -486,6 +486,7 @@ task Purple { output { File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" + File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" @@ -497,6 +498,8 @@ task Purple { File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" + File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" + File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" @@ -524,7 +527,7 @@ task Purple { purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, - purpleVersion] + purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, From 32c08100bcbf0590d7c1d69e08cdae2e3c640e99 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 May 2021 14:16:16 +0200 Subject: [PATCH 258/668] adjust runtime settings --- bcftools.wdl | 4 ++-- bwa.wdl | 4 ++-- extractSigPredictHRD.wdl | 4 ++-- gridss.wdl | 6 +++--- hmftools.wdl | 43 ++++++++++++++++++++-------------------- picard.wdl | 6 +++--- sambamba.wdl | 2 +- samtools.wdl | 2 ++ snpeff.wdl | 6 +++--- 9 files changed, 39 insertions(+), 38 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28b62696..8fab933a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "5G" - Int timeMinutes = 60 + ceil(size(inputFile, "G")) + String memory = "1G" + Int timeMinutes = 30 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bwa.wdl b/bwa.wdl index 670f00d2..1cb170b7 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 500 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 300 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 3) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 69c41ef8..2b5d9781 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -30,8 +30,8 @@ task ExtractSigPredictHRD { File svVcfIndex Boolean hg38 = false - String memory = "8G" - Int timeMinutes = 15 + String memory = "3G" + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" } diff --git a/gridss.wdl b/gridss.wdl index ef5ae9e5..acafc911 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -39,7 +39,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 4 - Int timeMinutes = ceil(5760 / threads) + 10 + Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -72,7 +72,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 25}G" + memory: "~{jvmHeapSizeGb + 15}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -117,7 +117,7 @@ task AnnotateInsertedSequence { String javaXmx = "8G" String memory = "9G" String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2 / threads) + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) } command { diff --git a/hmftools.wdl b/hmftools.wdl index 3dd52daf..9b22c10d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "52G" String javaXmx = "50G" - Int timeMinutes = 1200 + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -112,9 +112,9 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = "9G" - String javaXmx = "8G" - Int timeMinutes = 1200 + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } @@ -181,9 +181,9 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 120 + String memory = "33G" + String javaXmx = "32G" + Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } @@ -236,9 +236,9 @@ task GripssHardFilterApplicationKt { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 120 + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } @@ -274,7 +274,6 @@ task GripssHardFilterApplicationKt { } task HealthChecker { - # WIP input { String outputDir = "." String normalName @@ -285,9 +284,9 @@ task HealthChecker { File tumorMetrics Array[File]+ purpleOutput - String javaXmx = "10G" - String memory = "11G" - Int timeMinutes = 10 + String javaXmx = "2G" + String memory = "1G" + Int timeMinutes = 1 String dockerImage = "quay.io/biowdl/health-checker:3.2" } @@ -355,9 +354,9 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9G" - String javaXmx = "8G" - Int timeMinutes = 30 + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" } @@ -457,9 +456,9 @@ task Purple { File germlineHotspots Int threads = 1 - Int timeMinutes = 60 - String memory = "13G" - String javaXmx = "12G" + Int timeMinutes = 30 + String memory = "9G" + String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" } @@ -600,8 +599,8 @@ task Sage { Int threads = 2 String javaXmx = "50G" - String memory = "75G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String memory = "60G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" } diff --git a/picard.wdl b/picard.wdl index aefb4c21..9a935045 100644 --- a/picard.wdl +++ b/picard.wdl @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "33G" - String javaXmx = "32G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } diff --git a/sambamba.wdl b/sambamba.wdl index c8d9e11c..e78f50b6 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } diff --git a/samtools.wdl b/samtools.wdl index 9042a0df..954b5d4e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -27,6 +27,7 @@ task BgzipAndIndex { String type = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String memory = "1G" String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -55,6 +56,7 @@ task BgzipAndIndex { outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/snpeff.wdl b/snpeff.wdl index 85709079..4a3640c7 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -36,9 +36,9 @@ task SnpEff { Boolean noShiftHgvs = false Int? upDownStreamLen - String memory = "50G" - String javaXmx = "49G" - Int timeMinutes = 60 #FIXME + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" } From ffda341fae7bc7cc519451b018e43a76cae34d8e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 21 May 2021 14:37:56 +0200 Subject: [PATCH 259/668] adjust runtime settings --- bcftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 8fab933a..059cc39d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "1G" - Int timeMinutes = 30 + ceil(size(inputFile, "G")) + String memory = "2G" + Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c3df943f2964d2d5551baaf64c9bb2e2d9c198bf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 25 May 2021 13:01:25 +0200 Subject: [PATCH 260/668] update memory bcftools --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 059cc39d..5170a01f 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,7 +47,7 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "2G" + String memory = "4G" Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From ae1d2c02628d2239e79d24ecb78b4d4a3bcbc2d9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 28 May 2021 13:45:25 +0200 Subject: [PATCH 261/668] update changelog --- CHANGELOG.md | 2 ++ htseq.wdl | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22f41826..58d9f57f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ GffCompare: Make the `referenceAnnotation` input optional. ++ Stringtie: Add the `minimumCoverage` input. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. + Fixed the `size` call in the default for gffread's timeMinutes, to retrieve diff --git a/htseq.wdl b/htseq.wdl index ef4ae0a3..76d3bb83 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -23,7 +23,6 @@ version 1.0 task HTSeqCount { input { Array[File]+ inputBams - Array[File]+ inputBamIndexes File gtfFile String outputTable = "output.tsv" String order = "pos" From 24a6f1104c3a05053931b37db3fb8f3dd1e178b1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 31 May 2021 14:19:34 +0200 Subject: [PATCH 262/668] fix gffcompare --- gffcompare.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index 8b135479..5c83ba9d 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -91,7 +91,7 @@ task GffCompare { else 0 Int noInputFiles = length(inputGtfFiles) Boolean oneFile = (noFilesGtfList + noInputFiles) == 1 - String annotatedName = if oneFile + String annotatedName = if oneFile && defined(referenceAnnotation) then "annotated" else "combined" From 743acb0b89cc4893544965e3d93590978b414420 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 1 Jun 2021 12:13:09 +0200 Subject: [PATCH 263/668] Fix memory values. --- CHANGELOG.md | 1 + CPAT.wdl | 5 ++++- gffcompare.wdl | 3 +++ gffread.wdl | 3 +++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28e998e6..e47033c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add memory values to GffCompare, GffRead and CPAT. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. + UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) diff --git a/CPAT.wdl b/CPAT.wdl index afb67853..4a6d4478 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -34,6 +34,7 @@ task CPAT { Array[String]? startCodons Array[String]? stopCodons + String memory = "4G" Int timeMinutes = 10 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } @@ -60,8 +61,9 @@ task CPAT { } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -74,6 +76,7 @@ task CPAT { referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", category: "advanced"} startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gffcompare.wdl b/gffcompare.wdl index 5c83ba9d..aa7c7209 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -46,6 +46,7 @@ task GffCompare { Int? maxDistanceGroupingTranscriptStartSites String? namePrefix + String memory = "4G" Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" @@ -114,6 +115,7 @@ task GffCompare { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -140,6 +142,7 @@ task GffCompare { maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gffread.wdl b/gffread.wdl index 967dd5c9..a04540f5 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,6 +32,7 @@ task GffRead { String? proteinFastaPath String? filteredGffPath + String memory = "4G" Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } @@ -64,6 +65,7 @@ task GffRead { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -78,6 +80,7 @@ task GffRead { CDSFastaPath: {description: "The location the CDS fasta should be written to.", category: "advanced"} proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"} filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 379d0be3671d7c6aee65b8e18a73798f1ef80733 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Jun 2021 10:25:03 +0200 Subject: [PATCH 264/668] fix some runtime settings --- cutadapt.wdl | 2 +- multiqc.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..b49a95d4 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -83,7 +83,7 @@ task Cutadapt { Boolean? noZeroCap Int cores = 4 - String memory = "~{300 + 100 * cores}M" + String memory = "5G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } diff --git a/multiqc.wdl b/multiqc.wdl index 2571463a..a1662937 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -57,7 +57,7 @@ task MultiQC { String? clConfig String? memory - Int timeMinutes = 2 + ceil(size(reports, "G") * 8) + Int timeMinutes = 10 + ceil(size(reports, "G") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } From e7400ced4a7e413f794e05a62c8e2c1261a0e7fc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Mon, 7 Jun 2021 14:17:14 +0200 Subject: [PATCH 265/668] Move pacbio-merge image to quay.io Docker hub has started to remove unused images from free accounts, which means that it might remove images used by this pipeline without notice. Therefore the pipeline now exclusively uses images from quay.io or official repositories from docker hub, which do not have this limitation. --- pacbio.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pacbio.wdl b/pacbio.wdl index 7c0113fd..b21c69bc 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -26,7 +26,7 @@ task mergePacBio { String outputPathMergedReport String memory = "4G" - String dockerImage = "lumc/pacbio-merge:0.2" + String dockerImage = "quay.io/redmar_van_den_berg/pacbio-merge:0.2" } command { From a095517d6f9e729769e26e1bd7dd6385ac403fc1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 10 Jun 2021 16:20:34 +0200 Subject: [PATCH 266/668] Update tool versions. --- CHANGELOG.md | 18 ++++++++++++------ bam2fastx.wdl | 4 ++-- biowdl.wdl | 2 +- ccs.wdl | 2 +- common.wdl | 2 +- lima.wdl | 2 +- minimap2.wdl | 4 ++-- nanopack.wdl | 2 +- 8 files changed, 21 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e47033c6..9112c77d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,15 +10,21 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update biowdl-input-converter to version 0.3. ++ Update minimap2 to version 2.20. ++ Update lima to version 2.2.0. ++ Update ccs to version 6.0.0. ++ Update bam2fastx to version 1.3.1. + Add memory values to GffCompare, GffRead and CPAT. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. -+ UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) -+ UMI-tools: re-introduce samtools indexing -+ UMI-tools: update default dockerImage to use umitools v1.1.1 -+ UMI-tools dedup: Add tempdir -+ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). -+ Duphold: add duphold.wdl. ++ UMI-tools: Update default dockerImage to use umitools v1.1.1 with correct + samtools version (1.10). ++ UMI-tools: Re-introduce samtools indexing. ++ UMI-tools: Update default dockerImage to use umitools v1.1.1. ++ UMI-tools dedup: Add tempdir. ++ Bcftools view: Add options for filtering (include, exclude, excludeUncalled). ++ Duphold: Add `duphold.wdl`. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 2ae22a57..0bdccca8 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -32,7 +32,7 @@ task Bam2Fasta { String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } command { @@ -100,7 +100,7 @@ task Bam2Fastq { String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } command { diff --git a/biowdl.wdl b/biowdl.wdl index 06b1d756..dead8303 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -34,7 +34,7 @@ task InputConverter { String memory = "128M" Int timeMinutes = 1 - String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" + String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } command <<< diff --git a/ccs.wdl b/ccs.wdl index 4446937b..69095f4d 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -37,7 +37,7 @@ task CCS { Int threads = 2 String memory = "4G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" + String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } command { diff --git a/common.wdl b/common.wdl index 66bdb99c..54b11567 100644 --- a/common.wdl +++ b/common.wdl @@ -221,7 +221,7 @@ task YamlToJson { String memory = "128M" Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. - String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" + String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } command { diff --git a/lima.wdl b/lima.wdl index 2455aaac..f6faf079 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,7 +51,7 @@ task Lima { Int threads = 2 String memory = "2G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/lima:2.0.0--0" + String dockerImage = "quay.io/biocontainers/lima:2.2.0--h9ee0642_0" } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} diff --git a/minimap2.wdl b/minimap2.wdl index d2e69905..50ff4db3 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -33,7 +33,7 @@ task Indexing { Int cores = 1 String memory = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } command { @@ -100,7 +100,7 @@ task Mapping { Int cores = 4 String memory = "30G" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } command { diff --git a/nanopack.wdl b/nanopack.wdl index f86641b0..e4c94a43 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -42,7 +42,7 @@ task NanoPlot { Int threads = 2 String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/nanoplot:1.32.1--py_0" + String dockerImage = "quay.io/biocontainers/nanoplot:1.38.0--pyhdfd78af_0" } Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} From 7251bf276a5ea5a3d140d0438fe9647db74ddbc0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 09:59:25 +0200 Subject: [PATCH 267/668] Fix lima output naming. --- lima.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lima.wdl b/lima.wdl index f6faf079..6b87ad4f 100644 --- a/lima.wdl +++ b/lima.wdl @@ -91,13 +91,13 @@ task Lima { dirName="$(dirname ~{outputPrefix})" find "$(cd ${dirName}; pwd)" -name "*.bam" > bamFiles.txt find "$(cd ${dirName}; pwd)" -name "*.bam.pbi" > bamIndexes.txt - find "$(cd ${dirName}; pwd)" -name "*.subreadset.xml" > subreadsets.txt + find "$(cd ${dirName}; pwd)" -name "*.consensusreadset.xml" > consensusreadset.txt >>> output { Array[File] limaBam = read_lines("bamFiles.txt") Array[File] limaBamIndex = read_lines("bamIndexes.txt") - Array[File] limaXml = read_lines("subreadsets.txt") + Array[File] limaXml = read_lines("consensusreadset.txt") File limaStderr = outputPrefix + ".lima.stderr.log" File limaJson = outputPrefix + ".json" File limaCounts = outputPrefix + ".lima.counts" From 460d3d04e2aa83bac9b5ddfa708463a7a1713394 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 12:19:52 +0200 Subject: [PATCH 268/668] Update scripts. --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 85e2ec54..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 85e2ec542b65be5f2a25c22db05c28700fbe6db5 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From 6356c481cdb8d42820476fe7249f77d1e48bd9d2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 18:01:39 +0200 Subject: [PATCH 269/668] Fix outputs in ccs. --- CHANGELOG.md | 1 + ccs.wdl | 37 ++++++++++++++++++++++++++++++++----- scripts | 2 +- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e47033c6..c4eb9ac5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Fix output files in ccs.wdl. + Add memory values to GffCompare, GffRead and CPAT. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. diff --git a/ccs.wdl b/ccs.wdl index 4446937b..29f1a7f9 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -24,12 +24,19 @@ task CCS { input { File subreadsFile String outputPrefix + String logLevel = "WARN" Int minPasses = 3 + Int topPasses = 60 Int minLength = 10 Int maxLength = 50000 Boolean byStrand = false + Boolean skipPolish = false + Boolean all = false + Boolean subreadFallback = false + Boolean allKinetics = false + Boolean hifiKinetics = false + Float minSnr = 2.5 Float minReadQuality = 0.99 - String logLevel = "WARN" File? subreadsIndexFile String? chunkString @@ -37,7 +44,7 @@ task CCS { Int threads = 2 String memory = "4G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" + String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } command { @@ -45,15 +52,24 @@ task CCS { mkdir -p "$(dirname ~{outputPrefix})" ccs \ --min-passes ~{minPasses} \ + --min-snr ~{minSnr} \ + --top-passes ~{topPasses} \ --min-length ~{minLength} \ --max-length ~{maxLength} \ ~{true="--by-strand" false="" byStrand} \ + ~{true="--skip-polish" false="" skipPolish} \ + ~{true="--all" false="" all} \ + ~{true="--subread-fallback" false="" subreadFallback} \ + ~{true="--all-kinetics" false="" allKinetics} \ + ~{true="--hifi-kinetics" false="" hifiKinetics} \ --min-rq ~{minReadQuality} \ --log-level ~{logLevel} \ --num-threads ~{threads} \ ~{"--chunk " + chunkString} \ + ~{"--report-file " + outputPrefix + ".ccs_report.txt"} \ ~{"--report-json " + outputPrefix + ".ccs.report.json"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ + ~{"--metrics-json " + outputPrefix + ".zmw_metrics.json.gz"} \ ~{subreadsFile} \ ~{outputPrefix + ".ccs.bam"} } @@ -61,8 +77,10 @@ task CCS { output { File ccsBam = outputPrefix + ".ccs.bam" File ccsBamIndex = outputPrefix + ".ccs.bam.pbi" - File ccsReport = outputPrefix + ".ccs.report.json" + File ccsReport = outputPrefix + ".ccs_report.txt" + File ccsJsonReport = outputPrefix + ".ccs.report.json" File ccsStderr = outputPrefix + ".ccs.stderr.log" + File zmwMetrics = outputPrefix + ".zmw_metrics.json.gz" } runtime { @@ -76,12 +94,19 @@ task CCS { # inputs subreadsFile: {description: "Subreads input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} minPasses: {description: "Minimum number of full-length subreads required to generate ccs for a ZMW.", category: "advanced"} + topPasses: {description: "Pick at maximum the top N passes for each ZMW.", category: "advanced"} minLength: {description: "Minimum draft length before polishing.", category: "advanced"} maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} + skipPolish: {description: "Only output the initial draft template (faster, less accurate).", category: "advanced"} + all: {description: "Emit all ZMWs.", category: "advanced"} + subreadFallback: {description: "Emit a representative subread, instead of the draft consensus, if polishing failed.", category: "advanced"} + allKinetics: {description: "Calculate mean pulse widths (PW) and interpulse durations (IPD) for every ZMW.", category: "advanced"} + hifiKinetics: {description: "Calculate mean pulse widths (PW) and interpulse durations (IPD) for every HiFi read.", category: "advanced"} + minSnr: {description: "Minimum SNR of subreads to use for generating CCS.", category: "advanced"} minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} - logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsIndexFile: {description: "Index for the subreads input file, required when using chunkString.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} threads: {description: "The number of threads to be used.", category: "advanced"} @@ -92,7 +117,9 @@ task CCS { # outputs ccsBam: {description: "Consensus reads output file."} ccsBamIndex: {description: "Index of consensus reads output file."} - ccsReport: {description: "Ccs results report file."} + ccsReport: {description: "Ccs report file."} + ccsJsonReport: {description: "Ccs results json report file."} ccsStderr: {description: "Ccs STDERR log file."} + zmwMetrics: {description: "ZMW metrics json file."} } } diff --git a/scripts b/scripts index 85e2ec54..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 85e2ec542b65be5f2a25c22db05c28700fbe6db5 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From 833ad0bf47f9c42e33743ed5b0de7851ef66bbf1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 18:06:21 +0200 Subject: [PATCH 270/668] Remove weird line. --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4bade66..83da5399 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -<<<<<<< HEAD + Fix output files in ccs.wdl. + Update biowdl-input-converter to version 0.3. + Update minimap2 to version 2.20. From 4f879f72aec90d36d0201e9c1b54154f9decb757 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 12:40:32 +0200 Subject: [PATCH 271/668] Try to adjust localization. --- CHANGELOG.md | 7 ++++++- bam2fastx.wdl | 12 ++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83da5399..11a92d83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,12 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Fix output files in ccs.wdl. ++ Change the way localization of the input bam files and index are handled + in the bam2fastx tasks. ++ Add new parameters from CCS version 6.0.0 and add two new outputs: + `ccs_report.txt` & `zmw_metrics.json.gz`. ++ Change CutAdapt memory to `5G`. ++ Increase multiqc base time from 5 to 10. + Update biowdl-input-converter to version 0.3. + Update minimap2 to version 2.20. + Update lima to version 2.2.0. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0bdccca8..4a2ecf87 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - ln $bamFile . - bamFiles=$bamFiles" $(basename $bamFile)" + cp $bamFile ./ + bamFiles=$bamFiles" ./$(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln $index . + cp $index ./ done bam2fasta \ @@ -112,13 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - ln $bamFile . - bamFiles=$bamFiles" $(basename $bamFile)" + cp $bamFile ./ + bamFiles=$bamFiles" ./$(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln $index . + cp $index ./ done bam2fastq \ From a422e52920dc8fa2d2614f632962dec37964b939 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 14:23:57 +0200 Subject: [PATCH 272/668] Try a different approach. --- bam2fastx.wdl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 4a2ecf87..4e5ed3ed 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -35,7 +35,7 @@ task Bam2Fasta { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - cp $bamFile ./ - bamFiles=$bamFiles" ./$(basename $bamFile)" + ln -s ${bamFile} ./ + bamFiles=${bamFiles}" ./$(basename ${bamFile})" done - for index in ~{sep=" " bamIndex} + for indexFile in ~{sep=" " bamIndex} do - cp $index ./ + ln -s ${indexFile} ./ done bam2fasta \ @@ -58,8 +58,8 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - $bamFiles - } + ${bamFiles} + >>> output { File fastaFile = outputPrefix + ".fasta.gz" @@ -103,7 +103,7 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -112,13 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - cp $bamFile ./ - bamFiles=$bamFiles" ./$(basename $bamFile)" + ln -s ${bamFile} ./ + bamFiles=${bamFiles}" ./$(basename ${bamFile})" done - for index in ~{sep=" " bamIndex} + for indexFile in ~{sep=" " bamIndex} do - cp $index ./ + ln -s ${indexFile} ./ done bam2fastq \ @@ -126,8 +126,8 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - $bamFiles - } + ${bamFiles} + >>> output { File fastqFile = outputPrefix + ".fastq.gz" From adad218bbd6f501b0194107adf81cc9588ba91ba Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 15:02:41 +0200 Subject: [PATCH 273/668] Test tool without localization. --- bam2fastx.wdl | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 4e5ed3ed..3cdb29fb 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -103,31 +103,16 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command <<< + command { set -e mkdir -p "$(dirname ~{outputPrefix})" - - # Localise the bam and pbi files so they are next to each other in the - # current folder. - bamFiles="" - for bamFile in ~{sep=" " bam} - do - ln -s ${bamFile} ./ - bamFiles=${bamFiles}" ./$(basename ${bamFile})" - done - - for indexFile in ~{sep=" " bamIndex} - do - ln -s ${indexFile} ./ - done - bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} - >>> + ~{bam} + } output { File fastqFile = outputPrefix + ".fastq.gz" From adee85e2cfe420ba3a7be24f764233597d00a74a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 15:08:35 +0200 Subject: [PATCH 274/668] Fix array. --- bam2fastx.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 3cdb29fb..b09f7a0f 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -111,7 +111,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{bam} + ~{sep=" " bam} } output { From 734c4037e642bf318b249f8835f2042c40ff328d Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 15:49:53 +0200 Subject: [PATCH 275/668] Try another approach. --- bam2fastx.wdl | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index b09f7a0f..110441ec 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -103,16 +103,26 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" + + # Localise the bam and pbi files so they are next to each other in the + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam} + do + fullPathBam=$(readlink -f ${bamFile}) + bamFiles=${bamFiles}" ${fullPathBam}" + done + bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " bam} - } + ${bamFiles} + >>> output { File fastqFile = outputPrefix + ".fastq.gz" From 235fb43f046b285a3b5d8ca702b2cc8ad64dcf36 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 15 Jun 2021 11:18:37 +0200 Subject: [PATCH 276/668] Revert changes to WDL file. --- bam2fastx.wdl | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 110441ec..0bdccca8 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -35,7 +35,7 @@ task Bam2Fasta { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command <<< + command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - ln -s ${bamFile} ./ - bamFiles=${bamFiles}" ./$(basename ${bamFile})" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" done - for indexFile in ~{sep=" " bamIndex} + for index in ~{sep=" " bamIndex} do - ln -s ${indexFile} ./ + ln $index . done bam2fasta \ @@ -58,8 +58,8 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} - >>> + $bamFiles + } output { File fastaFile = outputPrefix + ".fasta.gz" @@ -103,7 +103,7 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command <<< + command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -112,8 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - fullPathBam=$(readlink -f ${bamFile}) - bamFiles=${bamFiles}" ${fullPathBam}" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" + done + + for index in ~{sep=" " bamIndex} + do + ln $index . done bam2fastq \ @@ -121,8 +126,8 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} - >>> + $bamFiles + } output { File fastqFile = outputPrefix + ".fastq.gz" From f2f7411a7b32bda18bba6eb8ee83606fa635f9e0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 15 Jun 2021 12:43:37 +0200 Subject: [PATCH 277/668] Add directory creation to samtools fastq. --- CHANGELOG.md | 3 +-- samtools.wdl | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11a92d83..e7242699 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,8 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Change the way localization of the input bam files and index are handled - in the bam2fastx tasks. ++ Samtools: Add mkdir line to `Fastq` task. + Add new parameters from CCS version 6.0.0 and add two new outputs: `ccs_report.txt` & `zmw_metrics.json.gz`. + Change CutAdapt memory to `5G`. diff --git a/samtools.wdl b/samtools.wdl index 9042a0df..46d1eb70 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -122,6 +122,8 @@ task Fastq { } command { + set -e + mkdir -p "$(dirname ~{outputRead1})" samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-2 " + outputRead2} \ From de03877e2e831285daaccc820db98da0897e1dac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Jun 2021 13:53:25 +0200 Subject: [PATCH 278/668] add cuppa and cuppa chart --- hmftools.wdl | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 9b22c10d..779820a3 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -168,6 +168,112 @@ task Cobalt { } } +task Cuppa { + input { + Array[File]+ linxOutput + Array[File]+ purpleOutput + String sampleName + Array[String]+ categories = ["DNA"] + Array[File]+ referenceData + File purpleSvVcf + File purpleSvVcfIndex + File purpleSomaticVcf + File purpleSomaticVcfIndex + String outputDir = "./cuppa" + + String javaXmx = "4G" + String memory = "5G" + Int time_minutes = 10 + String dockerImage = "quay.io/biowdl/cuppa:1.4" + } + + command { + set -e + mkdir -p sampleData ~{outputDir} + ln -s -t sampleData ~{sep=" " linxOutput} ~{sep=" " purpleOutput} + cuppa -Xmx~{javaXmx} \ + -output_dir ~{outputDir} \ + -output_id ~{sampleName} \ + -categories '~{sep="," categories}' \ + -ref_data_dir ~{sub(referenceData[0], basename(referenceData[0]), "")} \ + -sample_data_dir sampleData \ + -sample_data ~{sampleName} \ + -sample_sv_file ~{purpleSvVcf} \ + -sample_somatic_vcf ~{purpleSomaticVcf} + } + + output { + File cupData = "~{outputDir}/~{sampleName}.cup.data.csv" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + linxOutput: {description: "The files produced by linx.", category: "required"} + purpleOutput: {description: "The files produced by purple.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + categories: {description: "The classifiers to use.", category: "advanced"} + referenceData : {description: "The reference data.", category: "required"} + purpleSvVcf: {description: "The VCF file produced by purple which contains structural variants.", category: "required"} + purpleSvVcfIndex: {description: "The index of the structural variants VCF file produced by purple.", category: "required"} + purpleSomaticVcf: {description: "The VCF file produced by purple which contains somatic variants.", category: "required"} + purpleSomaticVcfIndex: {description: "The index of the somatic VCF file produced by purple.", category: "required"} + outputDir: {description: "The directory the ouput will be placed in.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task CuppaChart { + input { + String sampleName + File cupData + String outputDir = "./cuppa" + + String memory = "4G" + Int time_minutes = 5 + String dockerImage = "quay.io/biowdl/cuppa:1.4" + } + + command { + set -e + mkdir -p ~{outputDir} + cuppa-chart \ + -sample ~{sampleName} + -sample_data ~{cupData} + -output_dir ~{outputDir} + } + + output { + File cuppaChart = "~{outputDir}/~{sampleName}.cuppa.chart.png" + File cuppaConclusion = "~{outputDir}/~{sampleName}.cuppa.conclusion.txt" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category:"common"} + cupData: {description: "The cuppa output.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category:"common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GripssApplicationKt { input { File inputVcf From c0477edfd5904f1de11d7ea0d60e8b65e36e0bed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 16 Jun 2021 10:25:47 +0200 Subject: [PATCH 279/668] fix typo --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 779820a3..8beb5c76 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -183,7 +183,7 @@ task Cuppa { String javaXmx = "4G" String memory = "5G" - Int time_minutes = 10 + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.4" } @@ -239,7 +239,7 @@ task CuppaChart { String outputDir = "./cuppa" String memory = "4G" - Int time_minutes = 5 + Int timeMinutes = 5 String dockerImage = "quay.io/biowdl/cuppa:1.4" } From 1be4badcf451ccad2d2198dbfec4d97aaf68af45 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 21 Jun 2021 11:54:52 +0200 Subject: [PATCH 280/668] increase memory for amber --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8beb5c76..868d03fe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "52G" - String javaXmx = "50G" + String memory = "70G" + String javaXmx = "64G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 7a693a69f9a59755d527d733946406eed3a2f124 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Jun 2021 11:38:04 +0200 Subject: [PATCH 281/668] remove rainfall plot output --- hmftools.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 868d03fe..8e60351b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -615,7 +615,6 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" @@ -634,7 +633,7 @@ task Purple { purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, - segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] + segmentPlot, somaticClonalityPlot, somaticPlot] Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, circosSnp] From 664325fc50d19e074d80780cae322157f07035ed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Jun 2021 13:34:07 +0200 Subject: [PATCH 282/668] fix missing backslashes --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8e60351b..1b9d8d22 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -247,8 +247,8 @@ task CuppaChart { set -e mkdir -p ~{outputDir} cuppa-chart \ - -sample ~{sampleName} - -sample_data ~{cupData} + -sample ~{sampleName} \ + -sample_data ~{cupData} \ -output_dir ~{outputDir} } From 5e29a653559f7b7cc0f1e2fc787bbf8a8117f306 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 25 Jun 2021 07:40:37 +0200 Subject: [PATCH 283/668] Set defaults for boolean values --- spades.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/spades.wdl b/spades.wdl index 7cc16d21..1f246d48 100644 --- a/spades.wdl +++ b/spades.wdl @@ -34,16 +34,16 @@ task Spades { File? tslrContigs File? trustedContigs File? untrustedContigs - Boolean? singleCell - Boolean? metagenomic - Boolean? rna - Boolean? plasmid - Boolean? ionTorrent - Boolean? onlyErrorCorrection - Boolean? onlyAssembler - Boolean? careful - Boolean? disableGzipOutput - Boolean? disableRepeatResolution + Boolean singleCell = False + Boolean metagenomic = False + Boolean rna = False + Boolean plasmid = False + Boolean ionTorrent = False + Boolean onlyErrorCorrection = False + Boolean onlyAssembler = False + Boolean careful = False + Boolean disableGzipOutput = False + Boolean disableRepeatResolution = False File? dataset File? tmpDir String? k From 2ebde5f0a1997a098f89370989bdbbcf242ac207 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 28 Jun 2021 09:51:24 +0200 Subject: [PATCH 284/668] survivor: line 47-49, change integer to string literal --- survivor.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/survivor.wdl b/survivor.wdl index 8b0360d8..de232405 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -44,9 +44,9 @@ task Merge { fileList \ ~{breakpointDistance} \ ~{suppVecs} \ - ~{true=1 false=0 svType} \ - ~{true=1 false=0 strandType} \ - ~{true=1 false=0 distanceBySvSize} \ + ~{true='1' false='0' svType} \ + ~{true='1' false='0' strandType} \ + ~{true='1' false='0' distanceBySvSize} \ ~{minSize} \ ~{outputPath} } From da28f9399252cb8777abc630fe8c34e406d13da3 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 28 Jun 2021 10:56:04 +0200 Subject: [PATCH 285/668] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7242699..ad3d30fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Survivor: Change integer to string literal in boolean parameters. + Samtools: Add mkdir line to `Fastq` task. + Add new parameters from CCS version 6.0.0 and add two new outputs: `ccs_report.txt` & `zmw_metrics.json.gz`. From 545f63af658df8fc515672589a7bfb7e81ed2be3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 28 Jun 2021 11:36:52 +0200 Subject: [PATCH 286/668] update some version and add repeatmasker annotation for gridss --- gridss.wdl | 123 +++++++++++++++++++++++++++++++-------------------- hmftools.wdl | 9 ++-- 2 files changed, 79 insertions(+), 53 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index acafc911..3844c602 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -22,6 +22,61 @@ version 1.0 import "bwa.wdl" as bwa +task AnnotateInsertedSequence { + input { + File inputVcf + String outputPath = "gridss.annotated.vcf.gz" + File viralReference + File viralReferenceFai + File viralReferenceDict + File viralReferenceImg + + Int threads = 8 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + } + + command { + AnnotateInsertedSequence -Xmx~{javaXmx} \ + REFERENCE_SEQUENCE=~{viralReference} \ + INPUT=~{inputVcf} \ + OUTPUT=~{outputPath} \ + ALIGNMENT=APPEND \ + WORKING_DIR='.' \ + WORKER_THREADS=~{threads} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF file.", category: "required"} + outputPath: {description: "The path the output will be written to.", category: "common"} + viralReference: {description: "A fasta file with viral sequences.", category: "required"} + viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} + viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} + viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { File tumorBam @@ -34,13 +89,12 @@ task GRIDSS { File? normalBai String? normalLabel File? blacklistBed - File? repeatmaskerBed File? gridssProperties Int jvmHeapSizeGb = 64 Int threads = 4 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" } command { @@ -56,7 +110,6 @@ task GRIDSS { ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ - ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz @@ -88,7 +141,6 @@ task GRIDSS { normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} - repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} @@ -104,64 +156,37 @@ task GRIDSS { } } -task AnnotateInsertedSequence { +task GridssAnnotateVcfRepeatmasker { input { - File inputVcf - String outputPath = "gridss.annotated.vcf.gz" - File viralReference - File viralReferenceFai - File viralReferenceDict - File viralReferenceImg + File gridssVcf + File gridssVcfIndex + String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - Int threads = 8 - String javaXmx = "8G" - String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + String memory = "4G" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3) } command { - java -Xmx~{javaXmx} \ - -Dsamjdk.create_index=true \ - -Dsamjdk.use_async_io_read_samtools=true \ - -Dsamjdk.use_async_io_write_samtools=true \ - -Dsamjdk.use_async_io_write_tribble=true \ - -Dsamjdk.buffer_size=4194304 \ - -cp /usr/local/share/gridss-2.9.4-0/gridss.jar \ - gridss.AnnotateInsertedSequence \ - REFERENCE_SEQUENCE=~{viralReference} \ - INPUT=~{inputVcf} \ - OUTPUT=~{outputPath} \ - ALIGNMENT=APPEND \ - WORKING_DIR='.' \ - WORKER_THREADS=~{threads} + gridss_annotate_vcf_repeatmasker + --output ~{outputPath} \ + --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + -w . \ + ~{gridssVcf} } output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File annotatedVcf = outputPath + File annotatedVcfIndex = "~{outputPath}.tbi" } runtime { - cpu: threads - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF file.", category: "required"} - outputPath: {description: "The path the output will be written to.", category: "common"} - viralReference: {description: "A fasta file with viral sequences.", category: "required"} - viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} - viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} - viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} - + gridssVcf: {description: "The GRIDSS output.", category: "required"} + gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} +} \ No newline at end of file diff --git a/hmftools.wdl b/hmftools.wdl index 1b9d8d22..7d6f1547 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -304,7 +304,8 @@ task GripssApplicationKt { -breakend_pon ~{breakendPon} \ -breakpoint_pon ~{breakpointPon} \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} \ + -paired_normal_tumor_ordinals } output { @@ -463,7 +464,7 @@ task Linx { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.15--hdfd78af_0" } command { @@ -565,7 +566,7 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.54--hdfd78af_0" } command { @@ -706,7 +707,7 @@ task Sage { String javaXmx = "50G" String memory = "60G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } command { From 04c65ab38a2d91051e3c0aa90c67738b755a4921 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 28 Jun 2021 15:29:21 +0200 Subject: [PATCH 287/668] add virusbreakend --- gridss.wdl | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 3844c602..52e039d1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -164,7 +164,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3) + Int timeMinutes = 1 + ceil(size(gridssVcf, "G") * 3) } command { @@ -181,6 +181,12 @@ task GridssAnnotateVcfRepeatmasker { } runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { gridssVcf: {description: "The GRIDSS output.", category: "required"} gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} @@ -189,4 +195,57 @@ task GridssAnnotateVcfRepeatmasker { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} + +task Virusbreakend { + input { + File bam + File bamIndex + File referenceFasta + File virusbreakendDB + String outputPath = "./virusbreakend.vcf" + + String memory = "75G" + Int threads = 8 + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 180 + } + + command { + mkdir virusbreakenddb + tar -xzvf ~{virusbreakendDB} -C virusbreakenddb --strip-components 1 + virusbreakend \ + --output ~{outputPath} \ + --workingdir . \ + --reference ~{referenceFasta} \ + --db virusbreakenddb \ + --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + -t ~{threads} \ + ~{bam} + } + + output { + File vcf = outputPath + File summary = "~{outputPath}.summary.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + bam: {description: "A BAM file.", category: "required"} + bamIndex: {description: "The index for the BAM file.", category: "required"} + referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + threads: {description: "The number of the threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From c2f223eb6a487d7c5bca957bdaaf830d0522d3cb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Jun 2021 13:26:01 +0200 Subject: [PATCH 288/668] add virusinterpreter --- hmftools.wdl | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 7d6f1547..f1617bbe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -778,3 +778,54 @@ task Sage { category: "advanced"} } } + +task VirusInterpreter { + input { + String sampleId + File virusBreakendTsv + File taxonomyDbTsv + File virusInterpretationTsv + File virusBlacklistTsv + String outputDir = "." + + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biowdl/virus-interpreter:1.0" + } + + command { + virus-interpreter -Xmx~{javaXmx} \ + -sample_id ~{sampleId} \ + -virus_breakend_tsv ~{virusBreakendTsv} \ + -taxonomy_db_tsv ~{taxonomyDbTsv} \ + -virus_interpretation_tsv ~{virusInterpretationTsv} \ + -virus_blacklist_tsv ~{virusBlacklistTsv} \ + -output_dir ~{outputDir} + } + + output { + File virusAnnotatedTsv = "~{outputDir}/~{sampleId}.virus.annotated.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleId: {description: "The name of the sample.", category: "required"} + virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"} + taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"} + virusInterpretationTsv: {description: "A virus interpretation tsv.", category: "required"} + virusBlacklistTsv: {description: "A virus blacklist tsv.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From f169d78589c3e4d2a97892cfc3fb685d6c217d6c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Jun 2021 16:02:28 +0200 Subject: [PATCH 289/668] add protect --- hmftools.wdl | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index f1617bbe..646d01ea 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -542,6 +542,101 @@ task Linx { } } +task Protect { + input { + String refGenomeVersion + String tumorName + String normalName + Array[String]+ sampleDoids + String outputDir = "." + Array[File]+ serveActionability + File doidsJson + File purplePurity + File purpleQc + File purpleDriverCatalogSomatic + File purpleDriverCatalogGermline + File purpleSomaticVariants + File purpleSomaticVariantsIndex + File purpleGermlineVariants + File purpleGermlineVariantsIndex + File purpleGeneCopyNumber + File linxFusion + File linxBreakend + File linxDriversCatalog + File chordPrediction + File annotatedVirus + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biowdl/protect:v1.4" + } + + command { + protect -Xmx~{javaXmx} \ + -ref_genome_version ~{refGenomeVersion} \ + -tumor_sample_id ~{tumorName} \ + -reference_sample_id ~{normalName} \ + -primary_tumor_doids ~{sep=";" sampleDoids} \ + -output_dir ~{outputDir} \ + -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ + -doid_json ~{doidsJson} \ + -purple_purity_tsv ~{purplePurity} \ + -purple_qc_file ~{purpleQc} \ + -purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \ + -purple_germline_driver_catalog_tsv ~{purpleDriverCatalogGermline} \ + -purple_somatic_variant_vcf ~{purpleSomaticVariants} \ + -purple_germline_variant_vcf ~{purpleGermlineVariants} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumber} \ + -linx_fusion_tsv ~{linxFusion} \ + -linx_breakend_tsv ~{linxBreakend} \ + -linx_driver_catalog_tsv ~{linxDriversCatalog} \ + -chord_prediction_txt ~{chordPrediction} \ + -annotated_virus_tsv ~{annotatedVirus} + } + + output { + File protectTsv = "~{outputDir}/~{tumorName}.protect.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + normalName: {description: "The name of the normal sample.", category: "required"} + sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"} + doidsJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + purplePurity: {description: "The purity file generated by purple.", category: "required"} + purpleQc: {description: "The QC file generated by purple.", category: "required"} + purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"} + purpleDriverCatalogGermline: {description: "The germline driver catalog generated by purple.", category: "required"} + purpleSomaticVariants: {description: "The somatic VCF generated by purple.", category: "required"} + purpleSomaticVariantsIndex: {description: "The index for the somatic VCF generated by purple.", category: "required"} + purpleGermlineVariants: {description: "The germline VCF generated by purple.", category: "required"} + purpleGermlineVariantsIndex: {description: "The index of the germline VCF generated by purple.", category: "required"} + purpleGeneCopyNumber: {description: "The gene copy number file generated by purple.", category: "required"} + linxFusion: {description: "The fusion file generated by linx.", category: "required"} + linxBreakend: {description: "The breakend file generated by linx.", category: "required"} + linxDriversCatalog: {description: "The driver catalog generated generated by linx.", category: "required"} + chordPrediction: {description: "The chord prediction file.", category: "required"} + annotatedVirus: {description: "The virus-interpreter output.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Purple { input { String normalName From 47c89884e700c1c7ad11ba26e195d7812a6f1fac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 30 Jun 2021 14:27:10 +0200 Subject: [PATCH 290/668] update CPAT to 3.0.4 --- CHANGELOG.md | 2 ++ CPAT.wdl | 17 +++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28e998e6..112b8f04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update CPAT to version 3.0.4. + + Changed the `outFilePath` input to `outputPrefix`. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. + UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) diff --git a/CPAT.wdl b/CPAT.wdl index afb67853..b3414bc5 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -23,7 +23,7 @@ version 1.0 task CPAT { input { File gene - String outFilePath + String outputPrefix File hex File logitModel @@ -35,7 +35,7 @@ task CPAT { Array[String]? stopCodons Int timeMinutes = 10 + ceil(size(gene, "G") * 30) - String dockerImage = "biocontainers/cpat:v1.2.4_cv1" + String dockerImage = "biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } # Some WDL magic in the command section to properly output the start and @@ -47,7 +47,7 @@ task CPAT { mkdir -p "$(dirname ~{outFilePath})" cpat.py \ --gene ~{gene} \ - --outfile ~{outFilePath} \ + --outfile ~{outputPrefix} \ --hex ~{hex} \ --logitModel ~{logitModel} \ ~{"--ref " + referenceGenome} \ @@ -56,7 +56,11 @@ task CPAT { } output { - File outFile = outFilePath + File orfSeqs = "~{outputPrefix}.ORF_seqs.fa" + File orfProb = "~{outputPrefix}.ORF_prob.tsv" + File orfProbBest = "~{outputPrefix}.ORF_prob.best.tsv" + File noOrf = "~{outputPrefix}.no_ORF.txt" + File rScript = "~{outputPrefix}.r" } runtime { @@ -67,7 +71,7 @@ task CPAT { parameter_meta { # inputs gene: {description: "Equivalent to CPAT's `--gene` option.", category: "required"} - outFilePath: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} + outputPrefix: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} hex: {description: "Equivalent to CPAT's `--hex` option.", category: "required"} logitModel: {description: "Equivalent to CPAT's `--logitModel` option.", category: "required"} referenceGenome: {description: "Equivalent to CPAT's `--ref` option.", category: "advanced"} @@ -76,9 +80,6 @@ task CPAT { stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # outputs - outFile: {description: "CPAT logistic regression model."} } } From d4d36e02f167fc1676071d67c6749feee44c510d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 30 Jun 2021 15:00:45 +0200 Subject: [PATCH 291/668] fix mkdir in CPAT --- CPAT.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index afce53e2..972613cf 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -45,7 +45,7 @@ task CPAT { # to non-optionals. command { set -e - mkdir -p "$(dirname ~{outFilePath})" + mkdir -p "$(dirname ~{outputPrefix})" cpat.py \ --gene ~{gene} \ --outfile ~{outputPrefix} \ From 8eb013496e3e81107ed18d8c5f067a9ffec15dea Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 30 Jun 2021 16:49:50 +0200 Subject: [PATCH 292/668] fix cpat dockerimage --- CPAT.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index 972613cf..e6cef3ea 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -36,7 +36,7 @@ task CPAT { String memory = "4G" Int timeMinutes = 10 + ceil(size(gene, "G") * 30) - String dockerImage = "biocontainers/cpat:3.0.4--py39hcbe4a3b_0" + String dockerImage = "quay.io/biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } # Some WDL magic in the command section to properly output the start and From 3c92beac7d694209332b66e6869c7c7b6a3ea885 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 1 Jul 2021 12:28:31 +0200 Subject: [PATCH 293/668] remove tabix from gridss --- gridss.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 52e039d1..6c8899e4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -112,7 +112,6 @@ task GRIDSS { ~{"--blacklist " + blacklistBed} \ ~{normalBam} \ ~{tumorBam} - tabix -p vcf ~{outputPrefix}.vcf.gz samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai } From 44a70a394df432fe678a0fa82ef015acf3e5c6d7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 09:42:45 +0200 Subject: [PATCH 294/668] fix missing backslash --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 6c8899e4..f9a92f56 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -167,7 +167,7 @@ task GridssAnnotateVcfRepeatmasker { } command { - gridss_annotate_vcf_repeatmasker + gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ -w . \ From 7c5ce8c031f34744f9759e59b2617113120a40be Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 10:50:50 +0200 Subject: [PATCH 295/668] set default timeMinutes GridssAnnotateVcfRepeatmasker to 120 --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index f9a92f56..02f32297 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(gridssVcf, "G") * 3) + Int timeMinutes = 120 } command { From f3ac54310bf8eabcf1fdeb61d1caca2149bac033 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 2 Jul 2021 12:35:33 +0200 Subject: [PATCH 296/668] Update spades.wdl Co-authored-by: Davy Cats --- spades.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/spades.wdl b/spades.wdl index 1f246d48..3975dd32 100644 --- a/spades.wdl +++ b/spades.wdl @@ -34,16 +34,16 @@ task Spades { File? tslrContigs File? trustedContigs File? untrustedContigs - Boolean singleCell = False - Boolean metagenomic = False - Boolean rna = False - Boolean plasmid = False - Boolean ionTorrent = False - Boolean onlyErrorCorrection = False - Boolean onlyAssembler = False - Boolean careful = False - Boolean disableGzipOutput = False - Boolean disableRepeatResolution = False + Boolean singleCell = false + Boolean metagenomic = false + Boolean rna = false + Boolean plasmid = false + Boolean ionTorrent = false + Boolean onlyErrorCorrection = false + Boolean onlyAssembler = false + Boolean careful = false + Boolean disableGzipOutput = false + Boolean disableRepeatResolution = false File? dataset File? tmpDir String? k From 4e0ab25eead014f9e9038bab7ea61a810dbb05cc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 13:02:18 +0200 Subject: [PATCH 297/668] small formatting fix --- gffcompare.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index aa7c7209..d06602bc 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -116,8 +116,8 @@ task GffCompare { runtime { memory: memory - time_minutes: timeMinutes - docker: dockerImage + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { From 497f12a7446dc80873a66fa00db1c9bbc0eece99 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 13:47:29 +0200 Subject: [PATCH 298/668] adjust repeatmasker time --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 02f32297..db20a203 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 120 + Int timeMinutes = 1440 } command { From 7f4433f50b5ef8deaeb1d86beaaaae5ff07bae41 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 15:25:28 +0200 Subject: [PATCH 299/668] fix missing memory runtime BgzipAndIndex --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index d34df51e..c8837d94 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -46,6 +46,7 @@ task BgzipAndIndex { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } From 477f00f57a1bf445672da7b7be7ed999e6230e93 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 5 Jul 2021 09:13:07 +0200 Subject: [PATCH 300/668] increase time for GridssAnnotateVcfRepeatmasker --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index db20a203..f137f968 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1440 + Int timeMinutes = 2880 } command { From 4a32a443a29e324b8b01fac1fdbc01a7f2078f79 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 7 Jul 2021 09:39:47 +0200 Subject: [PATCH 301/668] increase memory repeatmasker --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index f137f968..ad230d05 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -161,7 +161,7 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "4G" + String memory = "50G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" Int timeMinutes = 2880 } From 9d3b5a556bd642d8dc8d098694497a5a3b1950fb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 7 Jul 2021 09:46:34 +0200 Subject: [PATCH 302/668] add threads to repeatmasker --- gridss.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index ad230d05..069d6953 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -162,6 +162,7 @@ task GridssAnnotateVcfRepeatmasker { String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" String memory = "50G" + Int threads = 4 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" Int timeMinutes = 2880 } @@ -171,6 +172,7 @@ task GridssAnnotateVcfRepeatmasker { --output ~{outputPath} \ --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ -w . \ + -t ~{threads} \ ~{gridssVcf} } @@ -180,6 +182,7 @@ task GridssAnnotateVcfRepeatmasker { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage @@ -189,6 +192,7 @@ task GridssAnnotateVcfRepeatmasker { gridssVcf: {description: "The GRIDSS output.", category: "required"} gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} + threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From a40300a4d6fb9296ca9e4a1978fbeffe3cb86f90 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 16:59:58 +0200 Subject: [PATCH 303/668] update submodule scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index c31670d3..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From ff2b1efb8482282288107b28e1bf53ca91319b30 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 17:00:43 +0200 Subject: [PATCH 304/668] Change current development version in CHANGELOG.md to stable version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eda114e..7cb7a436 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.0-dev +version 5.0.0 --------------------------- + Update CPAT to version 3.0.4. + Changed the `outFilePath` input to `outputPrefix`. From a411311e0d74045541a000176c2f172c7d0679fd Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 17:02:48 +0200 Subject: [PATCH 305/668] update CI.yml --- .github/workflows/ci.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 97d329ad..78566111 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,8 @@ name: Continuous integration on: pull_request: - paths: - - "**.wdl" # Workflow files and task - - "**.yml" # Ci configuration, tests and docker images - - "!docs/**" + paths_ignore: + - "docs/**" defaults: run: @@ -29,4 +27,4 @@ jobs: - name: install requirements run: conda install -n test cromwell miniwdl wdl-aid - name: run linting - run: bash scripts/biowdl_lint.sh \ No newline at end of file + run: bash scripts/biowdl_lint.sh From 9b0873ab9180e4af3a3ab869a4e909f5f0ee327e Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 17:20:45 +0200 Subject: [PATCH 306/668] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0062ac97..09b254e9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.0.0 +6.0.0 From c80402130bdb7471e8f37fece8cb643625a0df02 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 09:58:51 +0200 Subject: [PATCH 307/668] fix Xmx in AnnotateInsertedSequence --- gridss.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 069d6953..aedac9ab 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -39,7 +39,9 @@ task AnnotateInsertedSequence { } command { - AnnotateInsertedSequence -Xmx~{javaXmx} \ + set -e + _JAVA_OPTIONS="${_JAVA_OPTIONS}:-Xmx~{javaXmx}" + AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ OUTPUT=~{outputPath} \ @@ -215,6 +217,7 @@ task Virusbreakend { } command { + set -e mkdir virusbreakenddb tar -xzvf ~{virusbreakendDB} -C virusbreakenddb --strip-components 1 virusbreakend \ From 28b1a835d558d8ecd60682e9105731b6762f4c30 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 11:32:44 +0200 Subject: [PATCH 308/668] fix wrong placeholder --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index aedac9ab..66e27ff0 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -40,7 +40,7 @@ task AnnotateInsertedSequence { command { set -e - _JAVA_OPTIONS="${_JAVA_OPTIONS}:-Xmx~{javaXmx}" + _JAVA_OPTIONS="$_JAVA_OPTIONS:-Xmx~{javaXmx}" AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ From d28a2a529ede9ffc89b18628cc012c846354e096 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 11:53:24 +0200 Subject: [PATCH 309/668] typo --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 66e27ff0..fcfed095 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,12 +35,12 @@ task AnnotateInsertedSequence { String javaXmx = "8G" String memory = "9G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + Int timeMinutes = 120 } command { set -e - _JAVA_OPTIONS="$_JAVA_OPTIONS:-Xmx~{javaXmx}" + _JAVA_OPTIONS="$_JAVA_OPTIONS -Xmx~{javaXmx}" AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ From 572114885be2bd0243ac59898c223fbf954e1510 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 14:40:29 +0200 Subject: [PATCH 310/668] update gripss version --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 646d01ea..9dc78dd8 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -290,7 +290,7 @@ task GripssApplicationKt { String memory = "33G" String javaXmx = "32G" Int timeMinutes = 45 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { @@ -346,7 +346,7 @@ task GripssHardFilterApplicationKt { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { From f62a7424b88a1de1e6c1791aeff7c020a60939cd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 14:42:20 +0200 Subject: [PATCH 311/668] fix gripss version in command --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 9dc78dd8..6a086d37 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -295,7 +295,7 @@ task GripssApplicationKt { command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ -reference ~{normalName} \ @@ -351,7 +351,7 @@ task GripssHardFilterApplicationKt { command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} From e470f59fa587bef9dd075eb28ba6317be89a8416 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 10:01:09 +0200 Subject: [PATCH 312/668] update purple and gripss versions --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6a086d37..8c38c501 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -290,7 +290,7 @@ task GripssApplicationKt { String memory = "33G" String javaXmx = "32G" Int timeMinutes = 45 - String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } command { @@ -346,7 +346,7 @@ task GripssHardFilterApplicationKt { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } command { @@ -661,7 +661,7 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.54--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0" } command { From c47163aa1c9d67b5d675444d06afe36e5ee31ec9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 13:24:47 +0200 Subject: [PATCH 313/668] change docker image for purple --- hmftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8c38c501..bf79070e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -661,7 +661,8 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0" + # clone of quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' + String dockerImage = "quay.io/biowdl/hmftools-purple:3.1" } command { From ed6061d1671ba091992248375e613daf57fd544d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 14:20:38 +0200 Subject: [PATCH 314/668] fix linx output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index bf79070e..a327fd0b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -488,7 +488,7 @@ task Linx { } output { - File driverCatalog = "~{outputDir}/~{sampleName}.driver.catalog.tsv" + File driverCatalog = "~{outputDir}/~{sampleName}.linx.driver.catalog.tsv" File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" From 1fbf3eacc269782a6035c0c2e2bec348a31de0e8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 19 Jul 2021 11:54:26 +0200 Subject: [PATCH 315/668] update common.wdl fastqc.wdl samtools.wdl somaticseq.wdl umi-tools.wdl wisestork.wdl: add runtime memory --- common.wdl | 28 +++++++++++++++++++++++----- fastqc.wdl | 4 +++- samtools.wdl | 3 ++- somaticseq.wdl | 10 ++++++++++ umi-tools.wdl | 2 ++ wisestork.wdl | 6 ++++++ 6 files changed, 46 insertions(+), 7 deletions(-) diff --git a/common.wdl b/common.wdl index 54b11567..d29ed5da 100644 --- a/common.wdl +++ b/common.wdl @@ -24,6 +24,8 @@ task AppendToStringArray { input { Array[String] array String string + + String memory = "1G" } command { @@ -36,7 +38,7 @@ task AppendToStringArray { } runtime { - memory: "1G" + memory: memory } } @@ -45,9 +47,11 @@ task CheckFileMD5 { input { File file String md5 + # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -60,6 +64,7 @@ task CheckFileMD5 { runtime { docker: dockerImage + memory: memory } } @@ -69,6 +74,8 @@ task ConcatenateTextFiles { String combinedFilePath Boolean unzip = false Boolean zip = false + + String memory = "1G" } # When input and output is both compressed decompression is not needed. @@ -86,7 +93,7 @@ task ConcatenateTextFiles { } runtime { - memory: "1G" + memory: memory } } @@ -97,6 +104,7 @@ task Copy { Boolean recursive = false # Version not that important as long as it is stable. + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -112,6 +120,7 @@ task Copy { runtime { docker: dockerImage + memory: memory } } @@ -122,6 +131,8 @@ task CreateLink { input { String inputFile String outputPath + + String memory = "1G" } command { @@ -131,12 +142,17 @@ task CreateLink { output { File link = outputPath } + + runtime { + memory: memory + } } task MapMd5 { input { Map[String,String] map + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -150,7 +166,7 @@ task MapMd5 { } runtime { - memory: "1G" + memory: memory docker: dockerImage } } @@ -160,6 +176,7 @@ task StringArrayMd5 { input { Array[String] stringArray + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -173,7 +190,7 @@ task StringArrayMd5 { } runtime { - memory: "1G" + memory: memory docker: dockerImage } } @@ -183,6 +200,7 @@ task TextToFile { String text String outputFile = "out.txt" + String memory = "1G" Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -196,7 +214,7 @@ task TextToFile { } runtime { - memory: "1G" + memory: memory time_minutes: timeMinutes docker: dockerImage } diff --git a/fastqc.wdl b/fastqc.wdl index 973eeed9..3a07db4e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -143,6 +143,7 @@ task Fastqc { task GetConfiguration { input { + String memory = "2G" # Needs more than 1 to pull the docker image. Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/fastqc:0.11.7--4" } @@ -165,7 +166,7 @@ task GetConfiguration { } runtime { - memory: "2G" # Needs more than 1 to pull the docker image. + memory: memory time_minute: timeMinutes docker: dockerImage } @@ -173,6 +174,7 @@ task GetConfiguration { parameter_meta { # inputs timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/samtools.wdl b/samtools.wdl index 46d1eb70..04e27fca 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -25,7 +25,7 @@ task BgzipAndIndex { File inputFile String outputDir String type = "vcf" - + String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -47,6 +47,7 @@ task BgzipAndIndex { runtime { time_minutes: timeMinutes docker: dockerImage + memory: memory } parameter_meta { diff --git a/somaticseq.wdl b/somaticseq.wdl index 07103ef9..27c3fe36 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -49,6 +49,7 @@ task ParallelPaired { Int threads = 1 Int timeMinutes = 60 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -92,6 +93,7 @@ task ParallelPaired { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -161,6 +163,7 @@ task ParallelPairedTrain { Int threads = 1 Int timeMinutes = 240 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -203,6 +206,7 @@ task ParallelPairedTrain { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -266,6 +270,7 @@ task ParallelSingle { Int threads = 1 Int timeMinutes = 60 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -302,6 +307,7 @@ task ParallelSingle { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -355,6 +361,7 @@ task ParallelSingleTrain { Int threads = 1 Int timeMinutes = 240 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -390,6 +397,7 @@ task ParallelSingleTrain { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -430,6 +438,7 @@ task ModifyStrelka { String outputVCFName = basename(strelkaVCF, ".gz") Int timeMinutes = 20 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -448,6 +457,7 @@ task ModifyStrelka { runtime { time_minutes: timeMinutes + memory: memory docker: dockerImage } diff --git a/umi-tools.wdl b/umi-tools.wdl index 86bf1314..b79817c2 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -30,6 +30,7 @@ task Extract { String? read2Output = "umi_extracted_R2.fastq.gz" Boolean threePrime = false + String memory = "20G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } @@ -51,6 +52,7 @@ task Extract { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } diff --git a/wisestork.wdl b/wisestork.wdl index 6be32168..8fb4b76b 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -31,6 +31,7 @@ task Count { Int? binSize File? binFile + String memory = "2G" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -50,6 +51,7 @@ task Count { } runtime { + memory: memory docker: dockerImage } } @@ -67,6 +69,7 @@ task GcCorrect { Int? iter Float? fracLowess + String memory = "2G" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -89,6 +92,7 @@ task GcCorrect { } runtime { + memory: memory docker: dockerImage } } @@ -143,6 +147,7 @@ task Zscore { Int? binSize File? binFile + String memory = "2G" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -163,6 +168,7 @@ task Zscore { } runtime { + memory: memory docker: dockerImage } } From 84cd7f692e38b4903e7945315a44265d7e16c5f2 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 09:04:24 +0200 Subject: [PATCH 316/668] update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cb7a436..d7bbc697 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.1-dev +--------------------------- ++ Update number of tasks: add memory runtime version 5.0.0 --------------------------- From e1281833adff3aab2489d315f8e7da98e0e2fade Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 14:41:54 +0200 Subject: [PATCH 317/668] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7bbc697..7f9df602 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.1-dev +version 5.0.1 --------------------------- + Update number of tasks: add memory runtime From c9c36cf805cb70e488136609d99f601b16b1aa66 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 16:39:50 +0200 Subject: [PATCH 318/668] update samtools.wdl somaticseq.wdl: add memory runtime --- samtools.wdl | 3 +++ somaticseq.wdl | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 46d1eb70..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,6 +26,7 @@ task BgzipAndIndex { String outputDir String type = "vcf" + String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -45,6 +46,7 @@ task BgzipAndIndex { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -54,6 +56,7 @@ task BgzipAndIndex { inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/somaticseq.wdl b/somaticseq.wdl index 07103ef9..2992a800 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -47,6 +47,7 @@ task ParallelPaired { File? strelkaSNV File? strelkaIndel + String memory = "2G" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -91,6 +92,7 @@ task ParallelPaired { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -121,6 +123,7 @@ task ParallelPaired { strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + ParallelPaired timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -159,6 +162,7 @@ task ParallelPairedTrain { File? strelkaSNV File? strelkaIndel + String memory = "2G" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -202,6 +206,7 @@ task ParallelPairedTrain { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -232,6 +237,7 @@ task ParallelPairedTrain { strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -264,6 +270,7 @@ task ParallelSingle { File? scalpelVCF File? strelkaVCF + String memory = "2G" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -301,6 +308,7 @@ task ParallelSingle { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -323,6 +331,7 @@ task ParallelSingle { scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -353,6 +362,7 @@ task ParallelSingleTrain { File? scalpelVCF File? strelkaVCF + String memory = "2G" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -389,6 +399,7 @@ task ParallelSingleTrain { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -411,6 +422,7 @@ task ParallelSingleTrain { scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -429,6 +441,7 @@ task ModifyStrelka { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") + String memory = "2G" Int timeMinutes = 20 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -447,6 +460,7 @@ task ModifyStrelka { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -455,6 +469,7 @@ task ModifyStrelka { # inputs strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 67c69d4eb336c69de999c66fadccf6c91345e0c7 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 16:46:45 +0200 Subject: [PATCH 319/668] small fix --- somaticseq.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/somaticseq.wdl b/somaticseq.wdl index 2992a800..63f8362e 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -123,7 +123,7 @@ task ParallelPaired { strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} - ParallelPaired + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 5cb91703fb777ae35bcf6e509f7e124643339891 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 14:18:56 +0200 Subject: [PATCH 320/668] update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cb7a436..dfb81c47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.1-dev +--------------------------- ++ add runtime memory to number of tasks. version 5.0.0 --------------------------- From 70cb8bdcbfa7d9384b2fd943a2686d01357854d3 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 14:34:30 +0200 Subject: [PATCH 321/668] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfb81c47..b3dbc7f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.1-dev +version 5.0.1 --------------------------- + add runtime memory to number of tasks. From 8349056c8bd768e472c4178201f3241edaa7952f Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 16:05:31 +0200 Subject: [PATCH 322/668] remove duplicated memory --- somaticseq.wdl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/somaticseq.wdl b/somaticseq.wdl index afe2918f..8c7fb884 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -50,7 +50,6 @@ task ParallelPaired { String memory = "2G" Int threads = 1 Int timeMinutes = 60 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -167,7 +166,6 @@ task ParallelPairedTrain { String memory = "2G" Int threads = 1 Int timeMinutes = 240 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -277,7 +275,6 @@ task ParallelSingle { String memory = "2G" Int threads = 1 Int timeMinutes = 60 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -371,7 +368,6 @@ task ParallelSingleTrain { String memory = "2G" Int threads = 1 Int timeMinutes = 240 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -451,7 +447,6 @@ task ModifyStrelka { String memory = "2G" Int timeMinutes = 20 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } From bc1bacf11498d2d30b85591cfccdcf71ef0966a5 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 16:22:35 +0200 Subject: [PATCH 323/668] remove duplicate memory --- samtools.wdl | 1 - somaticseq.wdl | 5 ----- 2 files changed, 6 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 54215831..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -49,7 +49,6 @@ task BgzipAndIndex { memory: memory time_minutes: timeMinutes docker: dockerImage - memory: memory } parameter_meta { diff --git a/somaticseq.wdl b/somaticseq.wdl index 8c7fb884..63f8362e 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -94,7 +94,6 @@ task ParallelPaired { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -209,7 +208,6 @@ task ParallelPairedTrain { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -312,7 +310,6 @@ task ParallelSingle { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -404,7 +401,6 @@ task ParallelSingleTrain { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -466,7 +462,6 @@ task ModifyStrelka { runtime { memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } From ab17de947e0509b853a60e87e80399e1ca83f826 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 13:37:37 +0200 Subject: [PATCH 324/668] add task for peach --- peach.wdl | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 peach.wdl diff --git a/peach.wdl b/peach.wdl new file mode 100644 index 00000000..9321d6bf --- /dev/null +++ b/peach.wdl @@ -0,0 +1,77 @@ +version 1.0 + +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Peach { + input { + File transcriptTsv + File germlineVcf + File germlineVcfIndex + File tumorName + File normalName + String outputDir = "./peach" + File panelJson + + String memory = "8G" + String dockerImage = "quay.io/biowdl/peach:v1.0" + Int timeMinutes = 20 + } + + command { + peach \ + --recreate_bed \ + --transcript_tsv ~{transcriptTsv} \ + ~{germlineVcf} \ + ~{tumorName} \ + ~{normalName} \ + 1.0 \ + ~{outputDir} \ + ~{panelJson} \ + vcftools + } + + output { + File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" + File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" + File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" + Array[File] peachFiles = [callsTsv, filterVcf, genotypeTsv] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + transcriptTsv: {description: "A tsv file describing transcripts.", category: "required"} + germlineVcf: {description: "The germline VCF file from hmftools' purple.", category: "required"} + germlineVcfIndex: {description: "The germline VCF's index.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + normalName: {description: "The name of the normal sample", category: "required"} + outputDir: {description: "The directory the ouput should be written to.", category: "required"} + panelJson: {description: "A JSON describing the panel.", category: "required"} + + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 1648c818b856f22ed9e7c8b6443d2e9bc072eb6a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 14:21:41 +0200 Subject: [PATCH 325/668] rename array output peach --- peach.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peach.wdl b/peach.wdl index 9321d6bf..72c7fde6 100644 --- a/peach.wdl +++ b/peach.wdl @@ -52,7 +52,7 @@ task Peach { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] peachFiles = [callsTsv, filterVcf, genotypeTsv] + Array[File] outputs = [callsTsv, filterVcf, genotypeTsv] } runtime { From dcafd29087866bfa4bc464e9fd301e8de234c138 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 15:20:03 +0200 Subject: [PATCH 326/668] fix validation issues --- peach.wdl | 2 +- samtools.wdl | 2 -- scripts | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/peach.wdl b/peach.wdl index 72c7fde6..5e0746aa 100644 --- a/peach.wdl +++ b/peach.wdl @@ -52,7 +52,7 @@ task Peach { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] outputs = [callsTsv, filterVcf, genotypeTsv] + Array[File] outputs = [callsTsv, filteredVcf, genotypeTsv] } runtime { diff --git a/samtools.wdl b/samtools.wdl index 7eb86351..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -28,7 +28,6 @@ task BgzipAndIndex { String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) - String memory = "1G" String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -59,7 +58,6 @@ task BgzipAndIndex { type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/scripts b/scripts index 84690a30..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From dce31f572b08b3ef1ff3209f101ec4e3e838646c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 29 Jul 2021 12:05:22 +0200 Subject: [PATCH 327/668] update linx version --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index a327fd0b..1e25938d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -464,7 +464,7 @@ task Linx { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.15--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.16--hdfd78af_0" } command { From 09d899b85aec47bcb065cb8b584e703828d488e7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Aug 2021 13:27:41 +0200 Subject: [PATCH 328/668] add bedtools coverage --- bedtools.wdl | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/bedtools.wdl b/bedtools.wdl index 3dbf93cb..f8713d2e 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -66,6 +66,55 @@ task Complement { } } +task Coverage { + input { + File genomeFile + File a + File? aIndex + File b + File? bIndex + String outputPath = "./coverage.tsv" + + String memory = "8G" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + + command { + bedtools coverage \ + -sorted \ + -g ~{genomeFile} \ + -a ~{a} \ + -b ~{b} \ + -d \ + > ~{outputPath} + } + + output { + File coverageTsv = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + genomeFile: {description: "A file listing the chromosomes and their lengths.", category: "required"} + a: {description: "The file containing the regions for which the coverage will be counted.", category: "required"} + aIndex: {description: "An index for the file given as `a`.", category: "common"} + b: {description: "The file in which the coverage will be counted. Likely a BAM file.", category: "required"} + bIndex: {description: "An index for the file given as `b`.", category: "common"} + outputPath: {description: "The path the ouptu will be written to.", category: "common"} + + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } +} + task Merge { input { File inputBed From 70cda88f96eecabb9b9a8d5f75f88515c0840a8f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 18 Aug 2021 16:00:25 +0200 Subject: [PATCH 329/668] add deconstructSigs task --- deconstructsigs.wdl | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 deconstructsigs.wdl diff --git a/deconstructsigs.wdl b/deconstructsigs.wdl new file mode 100644 index 00000000..ef47e3e3 --- /dev/null +++ b/deconstructsigs.wdl @@ -0,0 +1,66 @@ +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +version 1.0 + +task DeconstructSigs { + input { + File signaturesMatrix + File signaturesReference + String outputPath = "./signatures.rds" + + Int timeMinutes = 15 + String memory = "4G" + String dockerImage = "quay.io/biocontainers/r-deconstructsigs:1.9.0--r41hdfd78af_1" + } + + command { + R --no-echo << EOF + library(deconstructSigs) + tumor <- read.table("~{signaturesMatrix}", check.names=F) + ref <- data.frame(t(read.table("~{signaturesReference}", check.names=F, header=T, row.names="Type")), check.names=F) + tumor <- tumor[,colnames(ref)] + + sigs <- whichSignatures(tumor.ref=tumor, row.names(tumor), signatures.ref=ref, contexts.needed=T) + saveRDS(sigs, "~{outputPath}") + EOF + } + + output { + File signatureRDS = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + signaturesMatrix: {description: "A table containing columns represtenting mutation types (matching the types in the signatures reference) and one row with the counts for each of these types for the sample of intrest.", + category: "required"} + signaturesReference: {description: "A table describing the mutational signatures, formatted like those provided by COSMIC.", + category: "required"} + outputPath: {description: "The location the output will be written to.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 26574bf26bef2663e9a67fe99c2a241762eb4365 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 20 Aug 2021 13:43:07 +0200 Subject: [PATCH 330/668] update bedtools version for coverage --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index f8713d2e..1d956cab 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -77,7 +77,7 @@ task Coverage { String memory = "8G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } command { From e8df466dfba91be4e2c08e9fa57607ad48936d01 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 23 Aug 2021 12:11:39 +0200 Subject: [PATCH 331/668] fix incorrect type --- peach.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peach.wdl b/peach.wdl index 5e0746aa..b57842f7 100644 --- a/peach.wdl +++ b/peach.wdl @@ -25,8 +25,8 @@ task Peach { File transcriptTsv File germlineVcf File germlineVcfIndex - File tumorName - File normalName + String tumorName + String normalName String outputDir = "./peach" File panelJson From d76faa5a05528e6a74488b46a18bdfcd1a9402ea Mon Sep 17 00:00:00 2001 From: cedrick Date: Fri, 17 Sep 2021 09:55:37 +0200 Subject: [PATCH 332/668] update bcftools.wdk --- bcftools.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 0cbfdefd..0738d156 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,7 +290,7 @@ task View { input { File inputFile String outputPath = "output.vcf" - + Boolean excludeUncalled = false String? exclude String? include String memory = "256M" @@ -304,8 +304,9 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ - ~{"--include " + include} \ ~{"--exclude " + exclude} \ + ~{"--include " + include} \ + ~{true="--exclude-uncalled" false="" excludeUncalled} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -330,6 +331,8 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 15b12101e04df8d842f68cb5ddef7f7f8a932a9e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Sep 2021 14:55:43 +0200 Subject: [PATCH 333/668] fix protect command with multiple doids --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1e25938d..199d7d88 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -577,7 +577,7 @@ task Protect { -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ -reference_sample_id ~{normalName} \ - -primary_tumor_doids ~{sep=";" sampleDoids} \ + -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ -doid_json ~{doidsJson} \ From a7a504e4a3589787d8c25c5ca97149598b65f572 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Sep 2021 17:01:47 +0200 Subject: [PATCH 334/668] adjust resource settings --- gridss.wdl | 8 ++++---- hmftools.wdl | 4 ++-- peach.wdl | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index fcfed095..03193cca 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -94,7 +94,7 @@ task GRIDSS { File? gridssProperties Int jvmHeapSizeGb = 64 - Int threads = 4 + Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" } @@ -163,10 +163,10 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "50G" - Int threads = 4 + String memory = "25G" + Int threads = 8 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 2880 + Int timeMinutes = 1440 } command { diff --git a/hmftools.wdl b/hmftools.wdl index 199d7d88..f8b13c66 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -799,10 +799,10 @@ task Sage { String? mnvFilterEnabled File? coverageBed - Int threads = 2 + Int threads = 4 String javaXmx = "50G" String memory = "60G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } diff --git a/peach.wdl b/peach.wdl index b57842f7..af44daec 100644 --- a/peach.wdl +++ b/peach.wdl @@ -30,9 +30,9 @@ task Peach { String outputDir = "./peach" File panelJson - String memory = "8G" + String memory = "2G" String dockerImage = "quay.io/biowdl/peach:v1.0" - Int timeMinutes = 20 + Int timeMinutes = 5 } command { From 9d5972de8bd3cb4e0766a78461a989f878f88999 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 30 Sep 2021 11:44:16 +0200 Subject: [PATCH 335/668] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3dbc7f6..71df5def 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.0.1 --------------------------- ++ Smoove: enable genotyping ++ Bcftools: add boolean option to remove uncalled genotypes. + add runtime memory to number of tasks. version 5.0.0 From 48f0c3ebf543b0c2e707c73fb00bdafe308a4395 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 1 Oct 2021 13:26:07 +0200 Subject: [PATCH 336/668] update changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71df5def..a6df9307 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ that users understand how the changes affect the new version. version 5.0.1 --------------------------- + Smoove: enable genotyping -+ Bcftools: add boolean option to remove uncalled genotypes. + add runtime memory to number of tasks. version 5.0.0 From c48f3bb7078e52bbb653848857028ddc9d43a6de Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Oct 2021 12:32:49 +0200 Subject: [PATCH 337/668] increase memory for sambamba markdup --- sambamba.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index e78f50b6..4c2115e0 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize + # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 0932a62d6a00e5c600fcda7c3fa3a7aec40638bb Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Thu, 28 Oct 2021 08:52:01 -0600 Subject: [PATCH 338/668] Update chunked-scatter.wdl Older container is not producing the necessary bed file --- chunked-scatter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index fba1af5a..66954c36 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -86,7 +86,7 @@ task ScatterRegions { String memory = "256M" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" + String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" From 8224e2cb52132a7978db5760afa813d640d2bb74 Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Thu, 28 Oct 2021 08:55:46 -0600 Subject: [PATCH 339/668] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6df9307..6d40cd1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.2 +--------------------------- ++ bumped ScatterRegions container to 1.0.0 + version 5.0.1 --------------------------- + Smoove: enable genotyping From 9e868dbcfbd4374ef6e04fbe389bf550be67a6ba Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 8 Nov 2021 14:26:37 +0100 Subject: [PATCH 340/668] add img input for virusbreakend --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 03193cca..b36d6598 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -207,6 +207,7 @@ task Virusbreakend { File bam File bamIndex File referenceFasta + File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" @@ -246,6 +247,7 @@ task Virusbreakend { bam: {description: "A BAM file.", category: "required"} bamIndex: {description: "The index for the BAM file.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"} virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0a1995df4f853799cb945a2bc8d3ac0062039efd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 9 Nov 2021 12:29:10 +0100 Subject: [PATCH 341/668] try version 2.11.1 for gridss --- gridss.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b36d6598..5c267e79 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 120 } @@ -96,7 +96,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" } command { @@ -165,7 +165,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 1440 } @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 180 } From 7d1f9c92406f9865e8c035a5bd19feea5a22b7ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 10 Nov 2021 12:46:04 +0100 Subject: [PATCH 342/668] upgrade gridss to 2.12.2 --- gridss.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 5c267e79..1f14e23b 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 120 } @@ -96,7 +96,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" } command { @@ -165,14 +165,14 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 1440 } command { gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ - --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ -w . \ -t ~{threads} \ ~{gridssVcf} @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 180 } @@ -226,7 +226,7 @@ task Virusbreakend { --workingdir . \ --reference ~{referenceFasta} \ --db virusbreakenddb \ - --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ -t ~{threads} \ ~{bam} } From f9ed6158bfe70792d546e8e68b205f197c52b2ff Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Nov 2021 10:59:46 +0100 Subject: [PATCH 343/668] increase memory gridss --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 1f14e23b..2e68ed88 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,7 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 64 + Int jvmHeapSizeGb = 85 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" From c9657636bed7c7046e3799a0c3fca36473ae80e6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 10:07:54 +0100 Subject: [PATCH 344/668] increase gridss memory --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 2e68ed88..13596a48 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,7 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 85 + Int jvmHeapSizeGb = 185 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" From f64bd5367fee90d51d47db7c29af13816c9fedbe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 16:08:53 +0100 Subject: [PATCH 345/668] use alternative gridss image for virusbreakend --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 13596a48..b448a2dc 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 180 } From 1a9a8058f3991c0b76e934837dc64f80805fc4c6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 21:55:59 +0100 Subject: [PATCH 346/668] change gridss runtime settings --- gridss.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b448a2dc..d93f1b80 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 120 } @@ -94,9 +94,9 @@ task GRIDSS { File? gridssProperties Int jvmHeapSizeGb = 185 - Int threads = 8 - Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + Int threads = 4 + Int timeMinutes = ceil(7200 / threads) + 180 + String dockerImage = "quay.io/biowdl/gridss:2.12.2" } command { @@ -165,7 +165,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 1440 } From d3d2040093a79814a1bf0488d13a44342068c5b5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Sat, 13 Nov 2021 16:12:52 +0100 Subject: [PATCH 347/668] gridss more memory --- gridss.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index d93f1b80..8c05fe61 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,8 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 185 + Int jvmHeapSizeGb = 200 + Int nonJvmMemoryGb = 50 Int threads = 4 Int timeMinutes = ceil(7200 / threads) + 180 String dockerImage = "quay.io/biowdl/gridss:2.12.2" @@ -126,7 +127,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 15}G" + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } From e9f3c5fdf8aef7082911f6c40730264187cc6884 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 15 Nov 2021 11:55:52 +0100 Subject: [PATCH 348/668] make recovery sv vcf optional in purple --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index f8b13c66..1537bce5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -648,8 +648,8 @@ task Purple { File somaticVcf File germlineVcf File filteredSvVcf - File fullSvVcf - File fullSvVcfIndex + File? fullSvVcf + File? fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -676,7 +676,7 @@ task Purple { -somatic_vcf ~{somaticVcf} \ -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ - -sv_recovery_vcf ~{fullSvVcf} \ + ~{"-sv_recovery_vcf " + fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ -driver_catalog \ From 787ad56b36f24099ece60ae56a43af46cbbeaf00 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 26 Nov 2021 13:46:20 +0100 Subject: [PATCH 349/668] give dictionary and index to virusbreakend --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 8c05fe61..b43a3837 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -208,6 +208,8 @@ task Virusbreakend { File bam File bamIndex File referenceFasta + File referenceFastaFai + File referenceFastaDict File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" From 86a249825272f9bb4384f87057593047402a1a37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Dec 2021 15:36:36 +0100 Subject: [PATCH 350/668] Add sampleposition in array task --- common.wdl | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/common.wdl b/common.wdl index d29ed5da..fc8dc481 100644 --- a/common.wdl +++ b/common.wdl @@ -148,6 +148,43 @@ task CreateLink { } } +task GetSamplePositionInArray { + input { + Array[String] sampleIds + String sample + + # python:3.7-slim's sha256 digest. This image is based on debian buster. + String dockerImage = "python@sha256:e0f6a4df17d5707637fa3557ab266f44dddc46ebfc82b0f1dbe725103961da4e" + } + + command <<< + python <>> + + output { + Int position = read_int(stdout()) + } + + runtime { + # 4 gigs of memory to be able to build the docker image in singularity. + memory: "4G" + docker: dockerImage + } + + parameter_meta { + # inputs + sampleIds: {description: "A list of sample ids.", category: "required"} + sample: {description: "The sample for which the position is wanted.", category: "required"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + position: {description: ""} + } +} + task MapMd5 { input { Map[String,String] map From d970e6892b1e61d34c99e507fb3a62b7b04f2fc1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Dec 2021 16:33:41 +0100 Subject: [PATCH 351/668] Require 5 minutes --- common.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/common.wdl b/common.wdl index fc8dc481..1e4fc8cb 100644 --- a/common.wdl +++ b/common.wdl @@ -172,6 +172,7 @@ task GetSamplePositionInArray { # 4 gigs of memory to be able to build the docker image in singularity. memory: "4G" docker: dockerImage + timeMinutes: 5 } parameter_meta { From c21d27ff32bdf7210dddf98a711e32192e820a82 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:48:24 +0100 Subject: [PATCH 352/668] Add parameter_meta for macs2 --- macs2.wdl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 757eaf67..cbce18e9 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -26,11 +26,10 @@ task PeakCalling { Array[File]+ inputBamsIndex Array[File]+? controlBams Array[File]+? controlBamsIndex - String outDir + String outDir = "macs2" String sampleName Boolean nomodel = false - Int threads = 1 String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -50,8 +49,21 @@ task PeakCalling { } runtime { - cpu: threads + cpu: 1 memory: memory docker: dockerImage } + parameter_meta { + inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} + inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} + controlBams: {description: "Control BAM files for the input bam files.", category: "required"} + controlBamsIndex: {description: "The indexes for the control BAM files.", category: "required"} + sampleName: {description: "Name of the sample to be analysed", category: "required"} + outDir: {description: "All output files will be written in this directory.", category: "advanced"} + nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } } From 24ef56348f4ca8900f639d05aa28ec25fda3fbd1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:52:15 +0100 Subject: [PATCH 353/668] Add time minutes parameter --- macs2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index cbce18e9..983630c5 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,7 +29,7 @@ task PeakCalling { String outDir = "macs2" String sampleName Boolean nomodel = false - + Int timeMinutes = 600 # Default to 10 hours String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -52,6 +52,7 @@ task PeakCalling { cpu: 1 memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} From 17746ebbb5668b8382050105b69f33273019a512 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:56:37 +0100 Subject: [PATCH 354/668] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d40cd1f..126f1ed9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.1.0-dev +--------------------------- ++ Update parameter_meta for macs2 ++ Add sample position in array task. + version 5.0.2 --------------------------- + bumped ScatterRegions container to 1.0.0 From 019cbb96a68c2fca141c955126b0ad9b97511f2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 16:00:30 +0100 Subject: [PATCH 355/668] More correct evaluation of controlBams input --- macs2.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 983630c5..eb71ac1d 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -24,8 +24,8 @@ task PeakCalling { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Array[File]+? controlBams - Array[File]+? controlBamsIndex + Array[File] controlBams + Array[File] controlBamsIndex String outDir = "macs2" String sampleName Boolean nomodel = false @@ -38,7 +38,7 @@ task PeakCalling { set -e macs2 callpeak \ --treatment ~{sep = ' ' inputBams} \ - ~{true="--control" false="" defined(controlBams)} ~{sep = ' ' controlBams} \ + ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ ~{true='--nomodel' false='' nomodel} @@ -57,8 +57,8 @@ task PeakCalling { parameter_meta { inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - controlBams: {description: "Control BAM files for the input bam files.", category: "required"} - controlBamsIndex: {description: "The indexes for the control BAM files.", category: "required"} + controlBams: {description: "Control BAM files for the input bam files.", category: "common"} + controlBamsIndex: {description: "The indexes for the control BAM files.", category: "common"} sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} From 9c5ebf6bb9d32d030b783ed03f329db735a92b6f Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 15 Dec 2021 15:27:24 +0100 Subject: [PATCH 356/668] add umiAwareMarkDuplicate --- picard.wdl | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/picard.wdl b/picard.wdl index f75fdc32..0e189a60 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1004,3 +1004,57 @@ task RenameSample { renamedVcf: {description: "New VCF with renamed sample."} } } + +task UmiAwareMarkDuplicatesWithMateCigar { + input { + File inputBam + String outputPathBam + String outputPathMetrics + String outputPathUmiMetrics + String tempdir + Boolean dedup = true + + String memory = "10G" + Int timeMinutes = 360 + String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" ~{tempdir} + picard UmiAwareMarkDuplicatesWithMateCigar \ + I=~{inputBam} \ + O=~{outputPathBam} \ + M=~{outputPathMetrics} \ + UMI_METRICS_FILE=~{outputPathUmiMetrics} \ + TMP_DIR=~{tempdir} \ + REMOVE_DUPLICATES=~{dedup} \ + CREATE_INDEX=true \ + } + + output { + File outputBam = outputPathBam + File outputBamIndex = sub(outputPathBam, "\.bam$", ".bai") + File outputMetrics = outputPathMetrics + File outputUmiMetrics = outputPathUmiMetrics + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The unsorted input BAM file.", category: "required"} + outputPathBam: {description: "The location the output BAM file should be written to.", category: "required"} + outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} + outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} + tmpDir: {description: "Temporary directory.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } +} \ No newline at end of file From 010ce0ac0835f0faa1353f3f43b544c2b0ecb50c Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 15 Dec 2021 15:27:38 +0100 Subject: [PATCH 357/668] add annotateBamWithUmi --- fgbio.wdl | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 fgbio.wdl diff --git a/fgbio.wdl b/fgbio.wdl new file mode 100644 index 00000000..d50906d3 --- /dev/null +++ b/fgbio.wdl @@ -0,0 +1,68 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task AnnotateBamWithUmis { + input { + File inputBam + File inputUmi + String outputPath + + String memory = "120G" + Int timeMinutes = 360 + String javaXmx="100G" + String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + fgbio -Xmx~{javaXmx} \ + AnnotateBamWithUmis \ + -i ~{inputBam} \ + -f ~{inputUmi} \ + -o ~{outputPath} + } + + output { + File outputBam = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file.", category: "required"} + inputUmi: {description: "The input fastq file with UMIs.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "UMI-annotated output BAM file."} + } +} From 014d43cc204fcf1f7159717c047210ca3f008c40 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Jan 2022 13:32:55 +0100 Subject: [PATCH 358/668] Make sure task is consistent --- picard.wdl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/picard.wdl b/picard.wdl index 0e189a60..d8ce5ebe 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1008,11 +1008,11 @@ task RenameSample { task UmiAwareMarkDuplicatesWithMateCigar { input { File inputBam - String outputPathBam - String outputPathMetrics - String outputPathUmiMetrics - String tempdir - Boolean dedup = true + String outputPath + String outputPathMetrics = outputPath + ".metrics" + String outputPathUmiMetrics = outputPath + ".umi-metrics" + String tempdir = "temp" + Boolean removeDuplicates = true String memory = "10G" Int timeMinutes = 360 @@ -1024,17 +1024,17 @@ task UmiAwareMarkDuplicatesWithMateCigar { mkdir -p "$(dirname ~{outputPath})" ~{tempdir} picard UmiAwareMarkDuplicatesWithMateCigar \ I=~{inputBam} \ - O=~{outputPathBam} \ + O=~{outputPath} \ M=~{outputPathMetrics} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ - REMOVE_DUPLICATES=~{dedup} \ + REMOVE_DUPLICATES=~{removeDuplicates} \ CREATE_INDEX=true \ } output { - File outputBam = outputPathBam - File outputBamIndex = sub(outputPathBam, "\.bam$", ".bai") + File outputBam = outputPath + File outputBamIndex = sub(outputPath, "\.bam$", ".bai") File outputMetrics = outputPathMetrics File outputUmiMetrics = outputPathUmiMetrics } @@ -1048,10 +1048,11 @@ task UmiAwareMarkDuplicatesWithMateCigar { parameter_meta { # inputs inputBam: {description: "The unsorted input BAM file.", category: "required"} - outputPathBam: {description: "The location the output BAM file should be written to.", category: "required"} + outputPath: {description: "The location the output BAM file should be written to.", category: "required"} outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} - tmpDir: {description: "Temporary directory.", category: "advanced"} + removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} + tempdir: {description: "Temporary directory.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8ccabed5e8c56d2f742d5aba829104fe8db00d2d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Jan 2022 14:10:45 +0100 Subject: [PATCH 359/668] Allow multiple bam inputs --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index d8ce5ebe..d2a6ca35 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1007,7 +1007,7 @@ task RenameSample { task UmiAwareMarkDuplicatesWithMateCigar { input { - File inputBam + Array[File] inputBams String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" @@ -1023,7 +1023,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { set -e mkdir -p "$(dirname ~{outputPath})" ~{tempdir} picard UmiAwareMarkDuplicatesWithMateCigar \ - I=~{inputBam} \ + INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ @@ -1047,7 +1047,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { parameter_meta { # inputs - inputBam: {description: "The unsorted input BAM file.", category: "required"} + inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} outputPath: {description: "The location the output BAM file should be written to.", category: "required"} outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} From 89eaf097695f6bda12a20f0d5ce993a230a8342a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 11:11:01 +0100 Subject: [PATCH 360/668] Add script to extract umi from read name --- umi.wdl | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 umi.wdl diff --git a/umi.wdl b/umi.wdl new file mode 100644 index 00000000..fdf764f4 --- /dev/null +++ b/umi.wdl @@ -0,0 +1,100 @@ +version 1.0 + +# Copyright (c) 2022 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task BamReadNameToUmiTag { + + # This task processes a bam file with reads that have been extracted with + # umi-tools extract. The UMI is extracted from the read name again and put + # in the bam file again with umiTag (default RX) + input { + File inputBam + String outputPath = "output.bam" + String umiTag = "RX" + + String memory = "2G" + Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) + String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" + } + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command <<< + python < Tuple[str, str]: + id_and_rest = name.split(maxsplit=1) + if len(id_and_rest) == 1: + id, = id_and_rest + other_parts = "" + else: + id, other_parts = id_and_rest + underscore_index = id.rfind("_") + umi = id[underscore_index + 1:] + new_id = id[:underscore_index] + if other_parts: + return " ".join([new_id, other_parts]), umi + return new_id, umi + + def annotate_umis(in_file, out_file, bam_tag = "RX"): + in_bam = pysam.AlignmentFile(in_file, "rb") + out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + for segment in in_bam: # type: pysam.AlignedSegment + new_name, umi = split_umi_from_name(segment.query_name) + segment.query_name = new_name + # append does not work. (Pysam is not Pythonic.) + segment.tags = segment.tags + [(bam_tag, umi)] + out_bam.write(segment) + + if __name__ == "__main__": + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) + CODE + >>> + + output { + File outputBam = outputBam + File outputBamIndex = outputBamIndex + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "common"} + umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} + + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} + } +} From 0a66c48bb5b75722d641d23c3421d2ca50c5ad21 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 11:46:00 +0100 Subject: [PATCH 361/668] Add umiTagName flag --- picard.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/picard.wdl b/picard.wdl index d2a6ca35..961364e4 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1013,6 +1013,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPathUmiMetrics = outputPath + ".umi-metrics" String tempdir = "temp" Boolean removeDuplicates = true + String umiTagName = "RX" String memory = "10G" Int timeMinutes = 360 @@ -1026,6 +1027,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ + UMI_TAG_NAME=~{umiTagName} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ @@ -1052,6 +1054,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} + umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d383b38d49cec511e9b6212dc1507e10ddc2fcec Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 12:26:23 +0100 Subject: [PATCH 362/668] Dedent overindented code --- umi.wdl | 56 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/umi.wdl b/umi.wdl index fdf764f4..7c435654 100644 --- a/umi.wdl +++ b/umi.wdl @@ -37,38 +37,38 @@ task BamReadNameToUmiTag { String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command <<< python < Tuple[str, str]: - id_and_rest = name.split(maxsplit=1) - if len(id_and_rest) == 1: - id, = id_and_rest - other_parts = "" - else: - id, other_parts = id_and_rest - underscore_index = id.rfind("_") - umi = id[underscore_index + 1:] - new_id = id[:underscore_index] - if other_parts: - return " ".join([new_id, other_parts]), umi - return new_id, umi + def split_umi_from_name(name) -> Tuple[str, str]: + id_and_rest = name.split(maxsplit=1) + if len(id_and_rest) == 1: + id, = id_and_rest + other_parts = "" + else: + id, other_parts = id_and_rest + underscore_index = id.rfind("_") + umi = id[underscore_index + 1:] + new_id = id[:underscore_index] + if other_parts: + return " ".join([new_id, other_parts]), umi + return new_id, umi - def annotate_umis(in_file, out_file, bam_tag = "RX"): - in_bam = pysam.AlignmentFile(in_file, "rb") - out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) - for segment in in_bam: # type: pysam.AlignedSegment - new_name, umi = split_umi_from_name(segment.query_name) - segment.query_name = new_name - # append does not work. (Pysam is not Pythonic.) - segment.tags = segment.tags + [(bam_tag, umi)] - out_bam.write(segment) + def annotate_umis(in_file, out_file, bam_tag = "RX"): + in_bam = pysam.AlignmentFile(in_file, "rb") + out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + for segment in in_bam: # type: pysam.AlignedSegment + new_name, umi = split_umi_from_name(segment.query_name) + segment.query_name = new_name + # append does not work. (Pysam is not Pythonic.) + segment.tags = segment.tags + [(bam_tag, umi)] + out_bam.write(segment) - if __name__ == "__main__": - annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") - pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) + if __name__ == "__main__": + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) CODE >>> From acff4bd9fffbd5a6326b96144f2fe47c2b548a36 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 12:28:28 +0100 Subject: [PATCH 363/668] Also create directories --- umi.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index 7c435654..360405ff 100644 --- a/umi.wdl +++ b/umi.wdl @@ -37,8 +37,9 @@ task BamReadNameToUmiTag { String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command <<< python < Date: Tue, 11 Jan 2022 12:55:10 +0100 Subject: [PATCH 364/668] Correct output files --- umi.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index 360405ff..2a4bc9cf 100644 --- a/umi.wdl +++ b/umi.wdl @@ -75,8 +75,8 @@ task BamReadNameToUmiTag { >>> output { - File outputBam = outputBam - File outputBamIndex = outputBamIndex + File outputBam = outputPath + File outputBamIndex = bamIndexPath } runtime { From 091058e29c0aba1d8c412ec21cda942e7597d23c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 13:55:03 +0100 Subject: [PATCH 365/668] Update changelog --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 126f1ed9..fe0667e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,11 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- -+ Update parameter_meta for macs2 ++ Add a script to subtract UMI's from the read name and add them as + a BAM tag for each BAM record. The script is in umi.BamReadNameToUmiTag. ++ Add fgbio.AnnotateBamWithUmis. ++ Add picard.UmiAwareMarkDuplicatesWithMateCigar. ++ Update parameter_meta for macs2. + Add sample position in array task. version 5.0.2 From 054b7a7f13891c1a85c5a4e8e596e0cfb7d5282a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 16:36:37 +0100 Subject: [PATCH 366/668] Use more conventional list unpacking for clarity --- umi.wdl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/umi.wdl b/umi.wdl index 2a4bc9cf..e149cafe 100644 --- a/umi.wdl +++ b/umi.wdl @@ -45,11 +45,9 @@ task BamReadNameToUmiTag { def split_umi_from_name(name) -> Tuple[str, str]: id_and_rest = name.split(maxsplit=1) - if len(id_and_rest) == 1: - id, = id_and_rest - other_parts = "" - else: - id, other_parts = id_and_rest + id = id_and_rest[0] + # If there was no whitespace id_and_rest will have length 1 + other_parts = id_and_rest[1] if len(id_and_rest) == 2 else "" underscore_index = id.rfind("_") umi = id[underscore_index + 1:] new_id = id[:underscore_index] From 5df62f54b036d396ad78c966e19956a47df552c3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 14 Jan 2022 13:49:16 +0100 Subject: [PATCH 367/668] Add format parameter to macs2 --- macs2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/macs2.wdl b/macs2.wdl index eb71ac1d..e17d613b 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -28,6 +28,7 @@ task PeakCalling { Array[File] controlBamsIndex String outDir = "macs2" String sampleName + String format = "AUTO" Boolean nomodel = false Int timeMinutes = 600 # Default to 10 hours String memory = "8G" @@ -41,6 +42,7 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ + -f ~{format} \ ~{true='--nomodel' false='' nomodel} } From f05d7cb427d00a85994391b0e2829cc704bb3314 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 17 Jan 2022 09:08:56 +0100 Subject: [PATCH 368/668] Use set_tag call from pysam --- umi.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index e149cafe..59169685 100644 --- a/umi.wdl +++ b/umi.wdl @@ -62,8 +62,7 @@ task BamReadNameToUmiTag { for segment in in_bam: # type: pysam.AlignedSegment new_name, umi = split_umi_from_name(segment.query_name) segment.query_name = new_name - # append does not work. (Pysam is not Pythonic.) - segment.tags = segment.tags + [(bam_tag, umi)] + segment.set_tag("RX", umi, value_type="Z") out_bam.write(segment) if __name__ == "__main__": From 28a2801941d6b56a64d1c413a4998ff220cd9899 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 17 Jan 2022 09:25:50 +0100 Subject: [PATCH 369/668] Use proper encoding --- umi.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/umi.wdl b/umi.wdl index 59169685..a32d646a 100644 --- a/umi.wdl +++ b/umi.wdl @@ -59,10 +59,14 @@ task BamReadNameToUmiTag { in_bam = pysam.AlignmentFile(in_file, "rb") os.makedirs(os.path.dirname(out_file), exist_ok=True) out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway. + encoded_bam_tag = bam_tag.encode('ascii') for segment in in_bam: # type: pysam.AlignedSegment new_name, umi = split_umi_from_name(segment.query_name) segment.query_name = new_name - segment.set_tag("RX", umi, value_type="Z") + # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway. + # Value type has to be a string though, otherwise pysam crashes. + segment.set_tag(encoded_bam_tag, umi.encode('ascii'), value_type="Z") out_bam.write(segment) if __name__ == "__main__": From 7b2d86fef3c90983b9ca57a9aded3872756d80e3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 10:52:40 +0100 Subject: [PATCH 370/668] Set xmx value properly for UmiAwareMarkDuplicatesWithMateCigar --- picard.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 961364e4..46b11e51 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1015,7 +1015,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { Boolean removeDuplicates = true String umiTagName = "RX" - String memory = "10G" + String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 360 String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" } @@ -1023,7 +1024,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { command { set -e mkdir -p "$(dirname ~{outputPath})" ~{tempdir} - picard UmiAwareMarkDuplicatesWithMateCigar \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + UmiAwareMarkDuplicatesWithMateCigar \ INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ From 09b97388eea432a1d0b4c37fe65f5621e13e9d0b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 14:18:17 +0100 Subject: [PATCH 371/668] Update Picard and reevaluate use of intel inflater/defaler --- picard.wdl | 99 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 32 deletions(-) diff --git a/picard.wdl b/picard.wdl index 46b11e51..bf32c8ac 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -158,7 +158,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -284,7 +284,7 @@ task CollectRnaSeqMetrics { String memory = "9G" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -342,7 +342,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -404,7 +404,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -453,7 +453,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3G" - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -497,13 +497,15 @@ task GatherBamFiles { String outputBamPath Boolean createMd5File = false - Int? compressionLevel + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater Int javaXmxMb = 1024 Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -513,7 +515,9 @@ task GatherBamFiles { GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ - ~{"COMPRESSION_LEVEL=" + compressionLevel} \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ CREATE_INDEX=true \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } @@ -536,7 +540,9 @@ task GatherBamFiles { inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -555,10 +561,14 @@ task GatherVcfs { Array[File]+ inputVcfIndexes String outputVcfPath = "out.vcf.gz" + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater + String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -566,6 +576,10 @@ task GatherVcfs { mkdir -p "$(dirname ~{outputVcfPath})" picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherVcfs \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ + CREATE_INDEX=true \ INPUT=~{sep=' INPUT=' inputVcfs} \ OUTPUT=~{outputVcfPath} } @@ -590,6 +604,10 @@ task GatherVcfs { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + # outputs outputVcf: {description: "Multiple VCF files gathered into one file."} } @@ -601,14 +619,11 @@ task MarkDuplicates { Array[File]+ inputBams String outputBamPath String metricsPath - Int compressionLevel = 1 Boolean createMd5File = false - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). - # Higher compression levels similar to intel deflater. - # NOTE: this might change in the future when the intel - # deflater is updated! - Boolean useJdkDeflater = true + + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater # The program default for READ_NAME_REGEX is appropriate in nearly every case. # Sometimes we wish to supply "null" in order to turn off optical duplicate detection. @@ -622,7 +637,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -638,6 +653,8 @@ task MarkDuplicates { OUTPUT=~{outputBamPath} \ METRICS_FILE=~{metricsPath} \ COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ VALIDATION_STRINGENCY=SILENT \ ~{"READ_NAME_REGEX=" + read_name_regex} \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ @@ -668,9 +685,9 @@ task MarkDuplicates { outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} - createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} @@ -692,16 +709,20 @@ task MergeVCFs { Array[File]+ inputVCFsIndexes String outputVcfPath Int compressionLevel = 1 - Boolean useJdkInflater = true # Slightly faster than the intel one. + Boolean useJdkInflater = false # Better results for compression level 1 (much smaller). # Higher compression levels similar to intel deflater. # NOTE: this might change in the future when the intel deflater is updated! - Boolean useJdkDeflater = true + # Second NOTE: No it did not change. Only the fastest algorithm with + # worse compression is wrapped in the intel GKL. Instead of using + # one of the slightly slower but better compressing alternatives from ISA-L. + # (Which are also faster than zlib.) + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -757,7 +778,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" File? noneFile } @@ -818,7 +839,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4G" - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -853,13 +874,15 @@ task SortSam { Boolean createMd5File = false Int maxRecordsInRam = 500000 Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater # Default ram of 4 GB. Using 125001.0 to prevent an answer of # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -872,6 +895,8 @@ task SortSam { SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ VALIDATION_STRINGENCY=SILENT \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} @@ -896,7 +921,9 @@ task SortSam { sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"} createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"} maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} - compressionLevel: {description: "Compression level for all compressed files created.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -917,7 +944,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -967,7 +994,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -1014,11 +1041,13 @@ task UmiAwareMarkDuplicatesWithMateCigar { String tempdir = "temp" Boolean removeDuplicates = true String umiTagName = "RX" - + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "8G" String memory = "9G" Int timeMinutes = 360 - String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -1034,6 +1063,9 @@ task UmiAwareMarkDuplicatesWithMateCigar { TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ CREATE_INDEX=true \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -1058,6 +1090,9 @@ task UmiAwareMarkDuplicatesWithMateCigar { removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From a0933e34c55d4bed26510e0fd09fe013441898c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 14:38:42 +0100 Subject: [PATCH 372/668] Add option to assume sort order --- picard.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index bf32c8ac..144c7782 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,6 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" + String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true String umiTagName = "RX" @@ -1065,7 +1066,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ + ~{"ASSUME_SORT_ORDER=" + assumeSortOrder} } output { @@ -1089,6 +1091,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} + assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} From 58682093853cf6e62304d7797f3f268587187669 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 15:42:12 +0100 Subject: [PATCH 373/668] Have more records in ram --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index 144c7782..e81cd4e3 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,6 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" + Int maxRecordsInRam = 3000000 # Default is 500_000 but that will lead to very small files on disk. String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true @@ -1063,6 +1064,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ + MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ From 89a0324ab9467ab79528ce3908701d7b230b2822 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 Jan 2022 12:59:00 +0100 Subject: [PATCH 374/668] increase resources GRIDSS --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b43a3837..7d6a1ebf 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,10 +93,10 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 200 + Int jvmHeapSizeGb = 300 Int nonJvmMemoryGb = 50 Int threads = 4 - Int timeMinutes = ceil(7200 / threads) + 180 + Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } From 99215fdd9834f39569e5672b9daf5b010a777abc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Jan 2022 12:56:23 +0100 Subject: [PATCH 375/668] update scripts and changelog --- CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ bcftools.wdl | 4 ++-- scripts | 2 +- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 126f1ed9..6c0db947 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,43 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task for SnpEff. ++ Adjusted runtime settings for sambamba Markdup. ++ Added a task for sambamba Flagstat. ++ Added a task for Picard CollectWgsMetrics. ++ Added a task for Peach. ++ Added tasks for HMFtools: + + Amber + + Cobalt + + Cuppa + + CuppaChart + + GripssApplicationKt + + GripssHardFilterApplicationKt + + HealthChecker + + Linx + + Protect + + Purple + + Sage + + VirusInterpreter ++ Added a task for VirusBreakend. ++ Added a task for GridssAnnotateVcfRepeatmasker. ++ Bumped GRIDSS version to 2.12.2. ++ Adjusted GRIDSS runtime settings. ++ Added optional inputs to GRIDSS: + + blacklistBed + + gridssProperties ++ Added a task for GRIDSS AnnotateInsertedSequence. ++ Added a task for ExtractSigPredictHRD. ++ Added a task for DeconstructSigs. ++ Added option useSoftclippingForSupplementary (default false) to + BWA mem. ++ Adjusted BWA mem runtime settings. ++ Added a task for bedtools coverage. ++ Added a task for bcftools filter. ++ Adjusted runtime settings for bcftools annotate. ++ Added optional inputs to bcftools annotate: + + inputFileIndex + + annsFileIndex + Update parameter_meta for macs2 + Add sample position in array task. diff --git a/bcftools.wdl b/bcftools.wdl index 13ce36be..88d97cd0 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,8 +44,8 @@ task Annotate { String? regions File? regionsFile File? renameChrs - File? samplesFile - + File? samplesFile + Int threads = 0 String memory = "4G" Int timeMinutes = 60 + ceil(size(inputFile, "G")) diff --git a/scripts b/scripts index c31670d3..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From bf4c1a3e8ab1bbd73a8d7a3fe29a15ac8ad69153 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Jan 2022 16:24:09 +0100 Subject: [PATCH 376/668] adress comments --- hmftools.wdl | 108 +++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1537bce5..0b4ba6d0 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -22,9 +22,9 @@ version 1.0 task Amber { input { - String normalName - File normalBam - File normalBamIndex + String referenceName + File referenceBam + File referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -43,8 +43,8 @@ task Amber { command { AMBER -Xmx~{javaXmx} \ - -reference ~{normalName} \ - -reference_bam ~{normalBam} \ + -reference ~{referenceName} \ + -reference_bam ~{referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ @@ -63,8 +63,8 @@ task Amber { File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" - File normalSnpVcf = "~{outputDir}/~{normalName}.amber.snp.vcf.gz" - File normalSnpVcfIndex = "~{outputDir}/~{normalName}.amber.snp.vcf.gz.tbi" + File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" + File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] @@ -78,9 +78,9 @@ task Amber { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} - normalBam: {description: "The normal BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} + referenceBam: {description: "The normal BAM file.", category: "required"} + referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The tumor BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} @@ -102,9 +102,9 @@ task Amber { task Cobalt { input { - String normalName - File normalBam - File normalBamIndex + String referenceName + File referenceBam + File referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -120,8 +120,8 @@ task Cobalt { command { COBALT -Xmx~{javaXmx} \ - -reference ~{normalName} \ - -reference_bam ~{normalBam} \ + -reference ~{referenceName} \ + -reference_bam ~{referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ @@ -131,9 +131,9 @@ task Cobalt { output { File version = "~{outputDir}/cobalt.version" - File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" - File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" - File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" + File normalGcMedianTsv = "~{outputDir}/~{referenceName}.cobalt.gc.median.tsv" + File normalRationMedianTsv = "~{outputDir}/~{referenceName}.cobalt.ratio.median.tsv" + File normalRationPcf = "~{outputDir}/~{referenceName}.cobalt.ratio.pcf" File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" @@ -150,9 +150,9 @@ task Cobalt { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} - normalBam: {description: "The normal BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} + referenceBam: {description: "The normal BAM file.", category: "required"} + referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The tumor BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} @@ -279,7 +279,7 @@ task GripssApplicationKt { File inputVcf String outputPath = "gripss.vcf.gz" String tumorName - String normalName + String referenceName File referenceFasta File referenceFastaFai File referenceFastaDict @@ -287,8 +287,8 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "33G" - String javaXmx = "32G" + String memory = "32G" + String javaXmx = "31G" Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } @@ -298,7 +298,7 @@ task GripssApplicationKt { -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ - -reference ~{normalName} \ + -reference ~{referenceName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ @@ -383,9 +383,9 @@ task GripssHardFilterApplicationKt { task HealthChecker { input { String outputDir = "." - String normalName - File normalFlagstats - File normalMetrics + String referenceName + File referenceFlagstats + File referenceMetrics String tumorName File tumorFlagstats File tumorMetrics @@ -401,9 +401,9 @@ task HealthChecker { set -e mkdir -p ~{outputDir} health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -reference ~{normalName} \ - -ref_flagstat_file ~{normalFlagstats} \ - -ref_wgs_metrics_file ~{normalMetrics} \ + -reference ~{referenceName} \ + -ref_flagstat_file ~{referenceFlagstats} \ + -ref_wgs_metrics_file ~{referenceMetrics} \ -tumor ~{tumorName} \ -tum_flagstat_file ~{tumorFlagstats} \ -tum_wgs_metrics_file ~{tumorMetrics} \ @@ -425,9 +425,9 @@ task HealthChecker { parameter_meta { outputDir: {description: "The path the output will be written to.", category:"required"} - normalName: {description: "The name of the normal sample.", category: "required"} - normalFlagstats: {description: "The flagstats for the normal sample.", category: "required"} - normalMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} @@ -546,11 +546,11 @@ task Protect { input { String refGenomeVersion String tumorName - String normalName + String referenceName Array[String]+ sampleDoids String outputDir = "." Array[File]+ serveActionability - File doidsJson + File doidJson File purplePurity File purpleQc File purpleDriverCatalogSomatic @@ -576,11 +576,11 @@ task Protect { protect -Xmx~{javaXmx} \ -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ - -reference_sample_id ~{normalName} \ + -reference_sample_id ~{referenceName} \ -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ - -doid_json ~{doidsJson} \ + -doid_json ~{doidJson} \ -purple_purity_tsv ~{purplePurity} \ -purple_qc_file ~{purpleQc} \ -purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \ @@ -608,11 +608,11 @@ task Protect { parameter_meta { refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} - normalName: {description: "The name of the normal sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"} - doidsJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} purplePurity: {description: "The purity file generated by purple.", category: "required"} purpleQc: {description: "The QC file generated by purple.", category: "required"} purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"} @@ -639,7 +639,7 @@ task Protect { task Purple { input { - String normalName + String referenceName String tumorName String outputDir = "./purple" Array[File]+ amberOutput @@ -667,7 +667,7 @@ task Purple { command { PURPLE -Xmx~{javaXmx} \ - -reference ~{normalName} \ + -reference ~{referenceName} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ @@ -713,7 +713,7 @@ task Purple { File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File purpleVersion = "~{outputDir}/purple.version" - File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" + File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" @@ -744,7 +744,7 @@ task Purple { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} outputDir: {description: "The path to the output directory.", category: "common"} amberOutput: {description: "The output files of hmftools amber.", category: "required"} @@ -787,9 +787,9 @@ task Sage { Boolean panelOnly = false String outputPath = "./sage.vcf.gz" - String? normalName - File? normalBam - File? normalBamIndex + String? referenceName + File? referenceBam + File? referenceBamIndex Int? hotspotMinTumorQual Int? panelMinTumorQual Int? hotspotMaxGermlineVaf @@ -801,8 +801,8 @@ task Sage { Int threads = 4 String javaXmx = "50G" - String memory = "60G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 9 / threads) + String memory = "51G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } @@ -810,8 +810,8 @@ task Sage { SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ + ~{"-reference " + referenceName} \ + ~{"-reference_bam " + referenceBam} \ -ref_genome ~{referenceFasta} \ -hotspots ~{hotspots} \ -panel_bed ~{panelBed} \ @@ -848,9 +848,9 @@ task Sage { tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceName: {description: "The name of the normal/reference sample.", category: "common"} + referenceBam: {description: "The BAM file for the normal sample.", category: "common"} + referenceBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} From fb91a02460b22501cc1c57dc381a486a29b01fbd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 27 Jan 2022 12:01:28 +0100 Subject: [PATCH 377/668] update healthchecker --- hmftools.wdl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 0b4ba6d0..76620e3c 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -408,13 +408,16 @@ task HealthChecker { -tum_flagstat_file ~{tumorFlagstats} \ -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -output_dir ~{outputDir} + -output_dir ~{outputDir} + test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' + test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' } - output { - File? healthCheckSucceeded = "~{outputDir}/~{tumorName}.HealthCheckSucceeded" - File? healthCheckFailed = "~{outputDir}/~{tumorName}.HealthCheckFailed" + Boolean succeeded = read_boolean("result") + File outputFile = if succeeded + then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" + else "~{outputDir}/~{tumorName}.HealthCheckFailed" } runtime { From f234b0e8f46192d248e564f22bcd88912b890576 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 28 Jan 2022 14:42:42 +0100 Subject: [PATCH 378/668] add missing parameter_meta --- gridss.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 7d6a1ebf..d3d251a5 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -146,7 +146,8 @@ task GRIDSS { gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} - jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling", category: "advanced"} + nonJvmMemoryGb: {description: "The amount of memory in Gb to be requested besides JVM memory.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From b3b79f62d4a538642318c0316080f9a098ca4b48 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Feb 2022 14:24:45 +0100 Subject: [PATCH 379/668] add a task for Pave --- hmftools.wdl | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 76620e3c..1dbfd5de 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -545,6 +545,79 @@ task Linx { } } +task Pave { + input { + String outputDir = "./" + String sampleName + File vcfFile + File vcfFileIndex + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File refGenomeVersion + File driverGenePanel + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + Int timeMinutes = 50 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biowdl/pave:v1.0" + } + + command { + set -e + mkdir -p ~{outputDir} + pave -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -vcf_file ~{vcfFile} \ + -output_dir ~{outputDir} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -driver_gene_panel ~{driverGenePanel} + } + + output { + File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" + File outputVcfIndex = "~{outputVcf}.tbi" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + vcfFile: {description: "The input VCF file.", category: "required"} + vcfFileIndex: {description: "The index for the input vcf file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + #The following should be in the same directory. + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Protect { input { String refGenomeVersion From 3ffa051fd2be4edb4fbc466836c9da782e68be27 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Feb 2022 17:04:00 +0100 Subject: [PATCH 380/668] add task for gripss 2.0 --- hmftools.wdl | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1dbfd5de..c0c835b5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -274,7 +274,79 @@ task CuppaChart { } } +task Gripss { + input { + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File knownFusionPairBedpe + File breakendPon + File breakpointPon + String referenceName + String tumorName + File vcf + File vcfIndex + String outputDir = "./" + + String memory = "17G" + String javaXmx = "16G" + Int timeMinutes = 50 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + gripss -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -ref_genome ~{referenceFasta} \ + -known_hotspot_file ~{knownFusionPairBedpe} \ + -pon_sgl_file ~{breakendPon} \ + -pon_sv_file ~{breakpointPon} \ + -reference ~{referenceName} \ + -sample ~{tumorName} \ + -vcf ~{vcf} \ + -output_dir ~{outputDir} \ + -output_id somatic + } + + output { + File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" + File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz.tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + knownFusionPairBedpe: {description: "Equivalent to the `-known_hotspot_file` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-pon_sgl_file` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `-pon_sv_file` option.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + vcf: {description: "The input VCF.", category: "required"} + vcfIndex: {description: "The index for the input VCF.", category: "required"} + outputDir: {description: "The path the output will be written to.", category:"required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GripssApplicationKt { + # Obsolete input { File inputVcf String outputPath = "gripss.vcf.gz" @@ -322,13 +394,15 @@ task GripssApplicationKt { parameter_meta { inputVcf: {description: "The input VCF.", category: "required"} outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceName: {description: "The name of the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `-breakpoint_pon` option.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -339,6 +413,7 @@ task GripssApplicationKt { } task GripssHardFilterApplicationKt { + # Obsolete input { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" @@ -724,6 +799,7 @@ task Purple { File somaticVcf File germlineVcf File filteredSvVcf + File filteredSvVcfIndex File? fullSvVcf File? fullSvVcfIndex File referenceFasta From 22a880cdd2223034ebb80fcdb1006b2bd3fe81c7 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 11:52:10 +0100 Subject: [PATCH 381/668] update purple to 3.2 --- hmftools.wdl | 54 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c0c835b5..caafa440 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -65,8 +65,8 @@ task Amber { File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" - Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, - tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, + tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] } @@ -110,7 +110,7 @@ task Cobalt { File tumorBamIndex String outputDir = "./cobalt" File gcProfile - + Int threads = 1 String memory = "5G" String javaXmx = "4G" @@ -174,7 +174,7 @@ task Cuppa { Array[File]+ purpleOutput String sampleName Array[String]+ categories = ["DNA"] - Array[File]+ referenceData + Array[File]+ referenceData File purpleSvVcf File purpleSvVcfIndex File purpleSomaticVcf @@ -244,7 +244,7 @@ task CuppaChart { } command { - set -e + set -e mkdir -p ~{outputDir} cuppa-chart \ -sample ~{sampleName} \ @@ -429,7 +429,7 @@ task GripssHardFilterApplicationKt { -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} } output { @@ -490,7 +490,7 @@ task HealthChecker { output { Boolean succeeded = read_boolean("result") - File outputFile = if succeeded + File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" } @@ -675,10 +675,9 @@ task Pave { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - #The following should be in the same directory. geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} @@ -757,7 +756,7 @@ task Protect { } parameter_meta { - refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} referenceName: {description: "The name of the normal sample.", category: "required"} sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} @@ -800,41 +799,47 @@ task Purple { File germlineVcf File filteredSvVcf File filteredSvVcfIndex - File? fullSvVcf - File? fullSvVcfIndex + File fullSvVcf + File fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict File driverGenePanel File somaticHotspots File germlineHotspots - + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + Int threads = 1 Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - # clone of quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' - String dockerImage = "quay.io/biowdl/hmftools-purple:3.1" + # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' + String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" } command { PURPLE -Xmx~{javaXmx} \ -reference ~{referenceName} \ + -germline_vcf ~{germlineVcf} \ + -germline_hotspots ~{germlineHotspots} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ - -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ - ~{"-sv_recovery_vcf " + fullSvVcf} \ + -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ - -driver_catalog \ - -driver_gene_panel ~{driverGenePanel} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ - -germline_hotspots ~{germlineHotspots} \ + -driver_gene_panel ~{driverGenePanel} \ -threads ~{threads} } @@ -877,8 +882,8 @@ task Purple { File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, - purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, - purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, + purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, + purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, @@ -913,6 +918,11 @@ task Purple { driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0d7909255421e4e7b30cfcd51e68da1530221427 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 12:41:34 +0100 Subject: [PATCH 382/668] update linx to 1.17 --- hmftools.wdl | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index caafa440..810685bf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -529,8 +529,6 @@ task Linx { String outputDir = "./linx" File fragileSiteCsv File lineElementCsv - File replicationOriginsBed - File viralHostsCsv File knownFusionCsv File driverGenePanel #The following should be in the same directory. @@ -539,10 +537,10 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "5G" - String javaXmx = "4G" + String memory = "9G" + String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.16--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.17--hdfd78af_0" } command { @@ -554,9 +552,7 @@ task Linx { -output_dir ~{outputDir} \ -fragile_site_file ~{fragileSiteCsv} \ -line_element_file ~{lineElementCsv} \ - -replication_origins_file ~{replicationOriginsBed} \ - -viral_hosts_file ~{viralHostsCsv} \ - -gene_transcripts_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -check_fusions \ -known_fusion_file ~{knownFusionCsv} \ -check_drivers \ @@ -598,12 +594,10 @@ task Linx { svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} purpleOutput: {description: "The files produced by PURPLE.", category: "required"} - refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} - replicationOriginsBed: {description: "Replication timing input in BED format with replication timing as the 4th column.", category: "required"} - viralHostsCsv: {description: "A list of the viruses which were used for annotation of the GRIDSS results.", category: "required"} knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} From d6bfc449dfc6979511e746a52f6fddf0e30e7853 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 12:47:06 +0100 Subject: [PATCH 383/668] Speed up CI by using conda caching and only checking changed files Squashed commit of the following: commit 7fa743cc028b8e2c86bde49244834ee13c13e95b Author: Ruben Vorderman Date: Fri Feb 11 12:34:33 2022 +0100 Add comment about activate environment commit 2de7802e03f90cd6e26b3d8287fcb0c6b8b81d11 Author: Ruben Vorderman Date: Fri Feb 11 12:26:38 2022 +0100 Invalidate cache commit 8ca394d41361acf2511249e3e29688baf0705004 Author: Ruben Vorderman Date: Fri Feb 11 12:26:27 2022 +0100 Consolidate steps commit 31d09c6f0e86d4625bfa3a6e94a7ced910c7410c Author: Ruben Vorderman Date: Fri Feb 11 11:27:53 2022 +0100 Use correct path for caching commit 7e1374ed323bb38d674da09d7270def4a2192d00 Author: Ruben Vorderman Date: Fri Feb 11 11:22:10 2022 +0100 Do not cache conda packages commit deffd8a0776e15a4df58a1398fcbcb8b0f1430f0 Author: Ruben Vorderman Date: Fri Feb 11 11:20:59 2022 +0100 Remove unnecessary whitespace commit 8e97bcd4dfd8ee459a23f1931465875c0a41fd49 Author: Ruben Vorderman Date: Fri Feb 11 11:20:03 2022 +0100 Remove debugging task commit 8338cd4b843245d781d7028f1f1acad45c8c7d0d Author: Ruben Vorderman Date: Fri Feb 11 11:17:18 2022 +0100 Try to change path commit 6a75baa36eee340d7a6d766c89163e960a6203b0 Author: Ruben Vorderman Date: Fri Feb 11 11:12:18 2022 +0100 Delete path line in current github env commit cbbb9fe67cb796a010c01760ca2e05986f979ced Author: Ruben Vorderman Date: Fri Feb 11 11:05:50 2022 +0100 Properly activate commit 671568b7c8d79a5141429068a32b72814110b361 Author: Ruben Vorderman Date: Fri Feb 11 10:59:46 2022 +0100 Also printenv commit 4c8945e8d5305753482538389ddc8af892f493f9 Author: Ruben Vorderman Date: Fri Feb 11 10:56:45 2022 +0100 Manual activate commit a925c53a99836e81eb0e2b21075356370906c641 Author: Ruben Vorderman Date: Fri Feb 11 10:53:10 2022 +0100 Reset cache number commit 645ed2b4504d067ea1b26a0922943ef3d5c34622 Author: Ruben Vorderman Date: Fri Feb 11 10:51:09 2022 +0100 Activate environment path commit 5852d29fb538b80f06a738677e7ae271c6c57fa3 Author: Ruben Vorderman Date: Fri Feb 11 10:31:07 2022 +0100 Proper setting for cache commit 83f14a939d662d628ca47dc7b82bbc114f164541 Author: Ruben Vorderman Date: Fri Feb 11 10:03:45 2022 +0100 List environments commit 59267fbba267c0b1726733e390ff471d7012cefa Author: Ruben Vorderman Date: Fri Feb 11 10:01:58 2022 +0100 Activate environment manually commit 0a4d2cd5644407308fcc78356a8aef55de86c0c6 Author: Ruben Vorderman Date: Fri Feb 11 09:57:32 2022 +0100 List environments commit 0bc8fa939eb35a6eb352bb58b1235efecd34056f Author: Ruben Vorderman Date: Fri Feb 11 09:52:02 2022 +0100 Add mambaforge comment commit 719d92a0b5245be891d1b5c0eb38d8048abdc5a1 Author: Ruben Vorderman Date: Fri Feb 11 09:44:18 2022 +0100 Use normal conda, since environment is cached commit e5efbb75109f40cfa8b7b33280ec9707a31970d1 Author: Ruben Vorderman Date: Fri Feb 11 09:38:21 2022 +0100 Also cache environments.txt commit 4fa66afb6606ceeb7be577df9f20704d96fc3af0 Author: Ruben Vorderman Date: Fri Feb 11 09:34:46 2022 +0100 Check home commit 2ac42e42829141650585780d27f39d06ebaf8f75 Author: Ruben Vorderman Date: Wed Feb 9 17:00:27 2022 +0100 Add an annoying but effective manual check commit 78d88eae8cb3d1ca44709ce90bcffeb7c5786c1b Author: Ruben Vorderman Date: Wed Feb 9 16:54:29 2022 +0100 Cache correct path commit c05c94561785b1d5e198588dc210313014f3913d Author: Ruben Vorderman Date: Wed Feb 9 16:45:51 2022 +0100 Rename workflow commit 1c67f010c589c1c1fb407ac32e8ed74afdb3ddfd Author: Ruben Vorderman Date: Wed Feb 9 16:45:05 2022 +0100 Use correct quotes commit 7f9d2e559697e9d9d1f6df3514c8269612e7bcee Author: Ruben Vorderman Date: Wed Feb 9 16:42:25 2022 +0100 Only check changed wdl files commit 0e2a15b38e206fdb96d2d8b225999d6e5c9e6e73 Author: Ruben Vorderman Date: Wed Feb 9 16:34:35 2022 +0100 remove v parameter commit 89348dde8a84cd1d935999255c64428c99db7042 Author: Ruben Vorderman Date: Wed Feb 9 16:19:02 2022 +0100 Remove newline commit 752b8cb4a8407908348d8424fdc4b89d3219fdad Author: Ruben Vorderman Date: Wed Feb 9 16:17:33 2022 +0100 Git fetch develop commit 9216a3f846268ba00d0fe922055536b06dc975b3 Author: Ruben Vorderman Date: Wed Feb 9 15:53:13 2022 +0100 Specifically check origin commit b54c140de4fc0bf31d7c95384831aedb253f35a3 Author: Ruben Vorderman Date: Wed Feb 9 15:44:50 2022 +0100 Only chek files that are different from the base with womtool validate commit d963818753272aa18311d3d29276c3db6241e85d Author: Ruben Vorderman Date: Wed Feb 9 15:33:50 2022 +0100 Correctly use data commit 8113bfdd2e1feda6047e13da79885a3131c000e6 Author: Ruben Vorderman Date: Wed Feb 9 15:32:48 2022 +0100 Set correct env cache param commit 4f7af2ed0365887be9147954290c4b807673afdd Author: Ruben Vorderman Date: Wed Feb 9 15:30:23 2022 +0100 Add lint-evnironment commit b026b5a8a77ea131b229a50cb28e0d301915cfb8 Author: Ruben Vorderman Date: Wed Feb 9 15:28:50 2022 +0100 Use mamba env update commit 41fda1a9f52d56578a76f8bf185db86da2128a0e Author: Ruben Vorderman Date: Wed Feb 9 15:25:21 2022 +0100 Use cache commit fd1a64261bea956b6b31a26f5eaa38ce4a63121c Author: Ruben Vorderman Date: Wed Feb 9 14:31:17 2022 +0100 Add missing done statement commit 4a64eb43535f48e0558ba6c5dc408178784ef207 Merge: a36a227 f234b0e Author: Ruben Vorderman Date: Wed Feb 9 14:23:48 2022 +0100 Merge branch 'develop' into BIOWDL-583 commit a36a2274116732bc8e3229a267fe35ee4d61e7da Author: Ruben Vorderman Date: Wed Feb 9 14:23:26 2022 +0100 Implement all checks in lint.yml directly commit 391bb0de9619e75293599a1be1d24322fd466f4c Author: Ruben Vorderman Date: Wed Feb 9 14:11:33 2022 +0100 Use a separate lint file commit 832a131cee403ec0ac7d983d6e82fd567ce1b246 Author: Ruben Vorderman Date: Tue Dec 14 16:32:30 2021 +0100 Use mamba-forge and mamba to install dependencies --- .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/lint-environment.yml | 9 ++++ .github/workflows/ci.yml | 30 ----------- .github/workflows/lint.yml | 93 ++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 30 deletions(-) create mode 100644 .github/lint-environment.yml delete mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/lint.yml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3b4ec9ac..372071ee 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,3 +2,4 @@ - [ ] Pull request details were added to CHANGELOG.md. - [ ] Documentation was updated (if required). - [ ] `parameter_meta` was added/updated (if required). +- [ ] Submodule branches are on develop or a tagged commit. diff --git a/.github/lint-environment.yml b/.github/lint-environment.yml new file mode 100644 index 00000000..63b538fc --- /dev/null +++ b/.github/lint-environment.yml @@ -0,0 +1,9 @@ +name: biowdl-lint +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - cromwell + - wdl-aid + - miniwdl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 78566111..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Continuous integration - -on: - pull_request: - paths_ignore: - - "docs/**" - -defaults: - run: - # This is needed for miniconda, see: - # https://github.com/marketplace/actions/setup-miniconda#important - shell: bash -l {0} - -jobs: - lint: - runs-on: ubuntu-latest - name: Womtool validate and submodule up to date. - steps: - - uses: actions/checkout@v2.3.4 - with: - submodules: recursive - - name: install miniconda - uses: conda-incubator/setup-miniconda@v2.0.1 - with: - channels: conda-forge,bioconda,defaults - # Conda-incubator uses 'test' environment by default. - - name: install requirements - run: conda install -n test cromwell miniwdl wdl-aid - - name: run linting - run: bash scripts/biowdl_lint.sh diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..e6edbbab --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,93 @@ +name: Linting + +on: + pull_request: + paths_ignore: + - "docs/**" + +defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important + shell: bash -l {0} + +jobs: + lint: + runs-on: ubuntu-latest + name: Linting checks + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + + - name: Set cache date + run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV + + - name: Cache conda environment + uses: actions/cache@v2.1.7 + env: + # Increase this value to manually invalidate the cache + CACHE_NUMBER: 0 + with: + path: /usr/share/miniconda/envs/biowdl-lint + key: + ${{runner.os}}-biowdl-lint-${{ env.CACHE_NUMBER }}-${{env.DATE}}-${{ hashFiles('.github/lint-environment.yml') }} + id: env_cache + + # Use the builtin conda. This is the fastest installation. It may not be + # the fastest for resolving, but the package cache mitigates that problem. + # Since this installs fastest, it is fastest for all runs where a cache + # hit occurs. + - name: install miniconda + uses: conda-incubator/setup-miniconda@v2.1.1 + with: + channels: conda-forge,bioconda,defaults + channel-priority: strict + auto-activate-base: false + use-only-tar-bz2: true # Needed for proper caching according to the documentation. + # activate-environment is broken! This always seems to create a new environment. + # Activation is therefore done separately. + + - name: Create test environment if no cache is present + run: conda env create -n biowdl-lint -f .github/lint-environment.yml + if: steps.env_cache.outputs.cache-hit != 'true' + + - name: Activate test environment + # The new PATH should be passed to the environment, otherwise it won't register. + run: | + conda activate biowdl-lint + echo "PATH=$PATH" >> $GITHUB_ENV + + - name: Fetch develop branch for comparisons + run: git fetch --depth=1 origin develop + + - name: run womtool validate + # Only check files that have changed from the base reference. + # Womtool validate checks very slowly, so this saves a lot of time. + run: | + bash -c " + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + womtool validate $WDL_FILE + done + " + - name: run miniwdl check + run: bash -c 'miniwdl check $(git ls-files *.wdl)' + + - name: Check copyright headers + run: | + bash -c ' + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" + done + ' + - name: Check parameter_meta for inputs + run: | + bash -c " + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || + if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr + then + exit 1 + fi + done + " From 54337a3c99596e48149d0d2522cc79c0a7b379e9 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 13:38:34 +0100 Subject: [PATCH 384/668] update peach to 1.5 --- peach.wdl | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/peach.wdl b/peach.wdl index af44daec..6a5770f4 100644 --- a/peach.wdl +++ b/peach.wdl @@ -22,7 +22,6 @@ version 1.0 task Peach { input { - File transcriptTsv File germlineVcf File germlineVcfIndex String tumorName @@ -31,28 +30,26 @@ task Peach { File panelJson String memory = "2G" - String dockerImage = "quay.io/biowdl/peach:v1.0" + String dockerImage = "quay.io/biowdl/peach:v1.5" Int timeMinutes = 5 } command { + set -e + mkdir -p ~{outputDir} peach \ - --recreate_bed \ - --transcript_tsv ~{transcriptTsv} \ - ~{germlineVcf} \ - ~{tumorName} \ - ~{normalName} \ - 1.0 \ - ~{outputDir} \ - ~{panelJson} \ - vcftools + -vcf ~{germlineVcf} \ + --sample_t_id ~{tumorName} \ + --sample_r_id ~{normalName} \ + --tool_version 1.5 \ + --outputDir ~{outputDir} \ + --panel } output { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" - File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] outputs = [callsTsv, filteredVcf, genotypeTsv] + Array[File] outputs = [callsTsv, genotypeTsv] } runtime { @@ -62,7 +59,6 @@ task Peach { } parameter_meta { - transcriptTsv: {description: "A tsv file describing transcripts.", category: "required"} germlineVcf: {description: "The germline VCF file from hmftools' purple.", category: "required"} germlineVcfIndex: {description: "The germline VCF's index.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} @@ -74,4 +70,4 @@ task Peach { memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From adb8a68ce8fff78613ee95451db821363b74353b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 16:42:36 +0100 Subject: [PATCH 385/668] Debug task --- .github/workflows/lint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e6edbbab..622e0581 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -66,6 +66,7 @@ jobs: # Womtool validate checks very slowly, so this saves a lot of time. run: | bash -c " + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do womtool validate $WDL_FILE done From 37faa1b46883bb93c6e926141d6145b3ead9fafd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 16:50:07 +0100 Subject: [PATCH 386/668] Use heredoc script --- .github/workflows/lint.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 622e0581..7eb6fe5d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -65,25 +65,25 @@ jobs: # Only check files that have changed from the base reference. # Womtool validate checks very slowly, so this saves a lot of time. run: | - bash -c " + bash <<- SCRIPT set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do womtool validate $WDL_FILE done - " + SCRIPT - name: run miniwdl check run: bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | - bash -c ' + bash <<- SCRIPT for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" done - ' + SCRIPT - name: Check parameter_meta for inputs run: | - bash -c " + bash <<- SCRIPT for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr @@ -91,4 +91,4 @@ jobs: exit 1 fi done - " + SCRIPT From 7d8cadf598e9359e6ea6d9822fe63210f026acfe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:06:44 +0100 Subject: [PATCH 387/668] Use always upload cache --- .github/workflows/lint.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7eb6fe5d..11bf7a40 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -24,7 +24,8 @@ jobs: run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - name: Cache conda environment - uses: actions/cache@v2.1.7 + # Use an always upload cache to prevent solving conda environment again and again on failing linting. + uses: pat-s/always-upload-cache@v2.1.5 env: # Increase this value to manually invalidate the cache CACHE_NUMBER: 0 From eba0865e6865217ed34de9e04ac0f4c1b86f9435 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:11:34 +0100 Subject: [PATCH 388/668] Run stuff directly in bash --- .github/workflows/lint.yml | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 11bf7a40..61e3d99f 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -66,30 +66,24 @@ jobs: # Only check files that have changed from the base reference. # Womtool validate checks very slowly, so this saves a lot of time. run: | - bash <<- SCRIPT - set -x - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - womtool validate $WDL_FILE - done - SCRIPT + set -x + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + womtool validate $WDL_FILE + done - name: run miniwdl check run: bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | - bash <<- SCRIPT - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" - done - SCRIPT + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" + done - name: Check parameter_meta for inputs run: | - bash <<- SCRIPT - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || - if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr - then - exit 1 - fi - done - SCRIPT + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || + if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr + then + exit 1 + fi + done From e72270755a25b5259f99d6e1855bf10926a2dc5d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:14:27 +0100 Subject: [PATCH 389/668] Use set -x to better see what happens --- .github/workflows/lint.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 61e3d99f..7ef19e58 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -71,15 +71,19 @@ jobs: womtool validate $WDL_FILE done - name: run miniwdl check - run: bash -c 'miniwdl check $(git ls-files *.wdl)' + run: | + set -x + bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" done - name: Check parameter_meta for inputs run: | + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr From 868f3617f22d28ae6855ed8c5d75fd76c967a5db Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 14 Feb 2022 10:51:20 +0100 Subject: [PATCH 390/668] Add format parameter to parameter_meta --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index e17d613b..2afe3bbe 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -67,6 +67,6 @@ task PeakCalling { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - + format: {description: "Which format to use. Use BAMPE for paired-end reads.", category: "common"} } } From 0f6d75c76ed78cc1847acc732fd78ca44b2646a6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 14 Feb 2022 17:04:49 +0100 Subject: [PATCH 391/668] fix some issues --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c0c835b5..27badc9b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -312,8 +312,8 @@ task Gripss { output { File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" - File filteredVcf = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz" - File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz.tbi" } runtime { @@ -629,7 +629,7 @@ task Pave { File referenceFasta File referenceFastaFai File referenceFastaDict - File refGenomeVersion + String refGenomeVersion File driverGenePanel #The following should be in the same directory. File geneDataCsv From b72d2fcff910a8a7cf3c1103f90bcf2974b75b4c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 09:16:17 +0100 Subject: [PATCH 392/668] fix Pave output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index b349038d..36909ee4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -652,7 +652,7 @@ task Pave { output { File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" - File outputVcfIndex = "~{outputVcf}.tbi" + File outputVcfIndex = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz.tbi')}" } runtime { From 0554cfe785f39b9e1ebfef4a2dda7450a4ed749b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 09:36:33 +0100 Subject: [PATCH 393/668] fix copy paste error --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 36909ee4..c9745b57 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -651,8 +651,8 @@ task Pave { } output { - File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" - File outputVcfIndex = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz.tbi')}" + File outputVcf = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz')}" + File outputVcfIndex = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz.tbi')}" } runtime { From 92d964d52ea3d64f7f927f6b41933098c4ec3678 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 12:33:02 +0100 Subject: [PATCH 394/668] fix purple outputs --- hmftools.wdl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c9745b57..2015c125 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -838,11 +838,13 @@ task Purple { } output { - File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv" + File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" - File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" + File purpleGermlineDeletionTsv = "~{outputDir}/~{tumorName}.purple.germline.deletion.tsv" + File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" + File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv" File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv" File purpleQc = "~{outputDir}/~{tumorName}.purple.qc" @@ -851,10 +853,9 @@ task Purple { File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" - File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" - File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" + File purpleVersion = "~{outputDir}/purple.version" File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png" File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png" @@ -863,19 +864,19 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File purpleVersion = "~{outputDir}/purple.version" + File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" - File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" - File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" - File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" - File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" - File circosGaps = "~{outputDir}/circos/gaps.txt" File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" + File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos" + File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf" + File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" + File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" - Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + File circosGaps = "~{outputDir}/circos/gaps.txt" + Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, From d554e60c08dee3597680cb18d9eee67201aba5ac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 13:16:33 +0100 Subject: [PATCH 395/668] fix peach command --- peach.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peach.wdl b/peach.wdl index 6a5770f4..bd8375d7 100644 --- a/peach.wdl +++ b/peach.wdl @@ -43,7 +43,7 @@ task Peach { --sample_r_id ~{normalName} \ --tool_version 1.5 \ --outputDir ~{outputDir} \ - --panel + --panel ~{panelJson} } output { From 54f323f52f7ac0d0fbbab1f893b5f8583d504791 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 13:20:54 +0100 Subject: [PATCH 396/668] fix some typos --- peach.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peach.wdl b/peach.wdl index bd8375d7..d1bc17f8 100644 --- a/peach.wdl +++ b/peach.wdl @@ -38,11 +38,11 @@ task Peach { set -e mkdir -p ~{outputDir} peach \ - -vcf ~{germlineVcf} \ + --vcf ~{germlineVcf} \ --sample_t_id ~{tumorName} \ --sample_r_id ~{normalName} \ --tool_version 1.5 \ - --outputDir ~{outputDir} \ + --outputdir ~{outputDir} \ --panel ~{panelJson} } From c675c91fbc91f932c6f5018986d025993611f8a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 24 Feb 2022 15:14:35 +0100 Subject: [PATCH 397/668] fix linx output and health-checker command --- hmftools.wdl | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2015c125..3ab203fb 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -484,8 +484,14 @@ task HealthChecker { -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -output_dir ~{outputDir} - test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' - test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' + if [ -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' ] + then + echo 'true' > '~{outputDir}/succeeded' + fi + if [ -e '~{outputDir}/~{tumorName}.HealthCheckFailed' ] + then + echo 'false' > '~{outputDir}/succeeded' + fi } output { @@ -531,6 +537,7 @@ task Linx { File lineElementCsv File knownFusionCsv File driverGenePanel + Boolean writeAllVisFusions = false #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -540,7 +547,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.17--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } command { @@ -558,7 +565,8 @@ task Linx { -check_drivers \ -driver_gene_panel ~{driverGenePanel} \ -chaining_sv_limit 0 \ - -write_vis_data + -write_vis_data \ + ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} } output { @@ -569,7 +577,6 @@ task Linx { File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" - File linxViralInserts = "~{outputDir}/~{sampleName}.linx.viral_inserts.tsv" File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" @@ -578,9 +585,9 @@ task Linx { File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" File linxVersion = "~{outputDir}/linx.version" Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, - linxLinks, linxSvs, linxViralInserts, linxVisCopyNumber, - linxVisFusion, linxVisGeneExon, linxVisProteinDomain, - linxVisSegments, linxVisSvData, linxVersion] + linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion, + linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData, + linxVersion] } runtime { @@ -600,6 +607,7 @@ task Linx { lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + writeAllVisFusions: {description: "Equivalent to the -write_all_vis_fusions flag.", category: "advanced"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} From 5e821d51571d91727357e324cc9283eafce5e427 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 24 Feb 2022 16:26:29 +0100 Subject: [PATCH 398/668] fix health checker output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3ab203fb..9a3bd437 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -495,7 +495,7 @@ task HealthChecker { } output { - Boolean succeeded = read_boolean("result") + Boolean succeeded = read_boolean("succeeded") File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" From f2cc5cc02fb5ed2376969ff745ce0d6741fc32ff Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 11:43:22 +0100 Subject: [PATCH 399/668] add LinxVisualisations --- hmftools.wdl | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 9a3bd437..c852b520 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -622,6 +622,61 @@ task Linx { } } +task LinxVisualisations { + input { + String outputDir = "./linx_visualisation" + String sample + String refGenomeVersion + Array[File]+ linxOutput + Boolean plotReportable = true + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-linx-1.18-0/sv-linx.jar \ + com.hartwig.hmftools.linx.visualiser.SvVisualiser \ + -sample ~{sample} \ + -ref_genome_version ~{refGenomeVersion} \ + -circos /usr/local/bin/circos \ + -vis_file_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ + -data_out ~{outputDir}/circos \ + -plot_out ~{outputDir}/plot \ + ~{if plotReportable then "-plot_reportable" else ""} + } + + output { + + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + sample: {description: "The sample's name.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + linxOutput: {description: "The directory containing the linx output.", category: "required"} + plotReportable: {description: "Equivalent to the -plot_reportable flag.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Pave { input { String outputDir = "./" From 8fcd2e2598fbc340abdda2b3a3d56dae04cb6bdf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 12:41:51 +0100 Subject: [PATCH 400/668] add linx visualisation output --- hmftools.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c852b520..c1a824c6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -632,7 +632,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 10 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -647,12 +647,13 @@ task LinxVisualisations { -circos /usr/local/bin/circos \ -vis_file_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ -data_out ~{outputDir}/circos \ - -plot_out ~{outputDir}/plot \ + -plot_out ~{outputDir}/plots \ ~{if plotReportable then "-plot_reportable" else ""} } output { - + Array[File] circos = glob("~{outputDir}/circos/*") + Array[File] plots = glob("~{outputDir}/plots/*" } runtime { From 97c9681b4d10a9fc5d7c2b930df9e69cba85d07c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 12:50:28 +0100 Subject: [PATCH 401/668] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index c1a824c6..7f739311 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -653,7 +653,7 @@ task LinxVisualisations { output { Array[File] circos = glob("~{outputDir}/circos/*") - Array[File] plots = glob("~{outputDir}/plots/*" + Array[File] plots = glob("~{outputDir}/plots/*") } runtime { From 2467174555e85c5b4cf819018afd44a8b5f24af8 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 14:19:08 +0100 Subject: [PATCH 402/668] update virus-interpreter to 1.2 --- hmftools.wdl | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 7f739311..65187f44 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1099,25 +1099,29 @@ task Sage { task VirusInterpreter { input { String sampleId + File purplePurityTsv + File prupleQcFile + File tumorSampleWgsMetricsFile File virusBreakendTsv File taxonomyDbTsv - File virusInterpretationTsv - File virusBlacklistTsv + File virusReportingDbTsv String outputDir = "." String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biowdl/virus-interpreter:1.0" + String dockerImage = "quay.io/biowdl/virus-interpreter:1.2" } command { - virus-interpreter -Xmx~{javaXmx} \ + virus-interpreter -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample_id ~{sampleId} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{prupleQcFile} \ + -tumor_sample_wgs_metrics_file ~{tumorSampleWgsMetricsFile} \ -virus_breakend_tsv ~{virusBreakendTsv} \ -taxonomy_db_tsv ~{taxonomyDbTsv} \ - -virus_interpretation_tsv ~{virusInterpretationTsv} \ - -virus_blacklist_tsv ~{virusBlacklistTsv} \ + -virus_reporting_db_tsv ~{virusReportingDbTsv} \ -output_dir ~{outputDir} } @@ -1133,10 +1137,12 @@ task VirusInterpreter { parameter_meta { sampleId: {description: "The name of the sample.", category: "required"} + purplePurityTsv: {description: "The purity file produced by purple.", category: "required"} + prupleQcFile: {description: "The QC file produced by purple.", category: "required"} + tumorSampleWgsMetricsFile: {description: "The picard WGS metrics file for this sample.", category: "required"} virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"} taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"} - virusInterpretationTsv: {description: "A virus interpretation tsv.", category: "required"} - virusBlacklistTsv: {description: "A virus blacklist tsv.", category: "required"} + virusReportingDbTsv: {description: "A virus reporting tsv.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From b76866a2fbe5c23961f63dfa6b68697cf3c23126 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 1 Mar 2022 15:37:19 +0100 Subject: [PATCH 403/668] update protect to 2.0 --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 65187f44..da9c6fd5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -632,7 +632,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 60 + Int timeMinutes = 1440 #FIXME String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -777,7 +777,7 @@ task Protect { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biowdl/protect:v1.4" + String dockerImage = "quay.io/biowdl/protect:v2.0" } command { From 513e64560afa2a532a791289e5ef77a90006aa50 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Mar 2022 10:23:07 +0100 Subject: [PATCH 404/668] fix health-checker --- hmftools.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 76620e3c..27b31bca 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -409,13 +409,19 @@ task HealthChecker { -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -output_dir ~{outputDir} - test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' - test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' + if [ -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' ] + then + echo 'true' > '~{outputDir}/succeeded' + fi + if [ -e '~{outputDir}/~{tumorName}.HealthCheckFailed' ] + then + echo 'false' > '~{outputDir}/succeeded' + fi } output { - Boolean succeeded = read_boolean("result") - File outputFile = if succeeded + Boolean succeeded = read_boolean("succeeded") + File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" } From 652735023d7a71738b0ccea450e4fedd27e41830 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Mar 2022 12:42:00 +0100 Subject: [PATCH 405/668] update cuppa to 1.6 --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index da9c6fd5..277c8dd4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -184,7 +184,7 @@ task Cuppa { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.4" + String dockerImage = "quay.io/biowdl/cuppa:1.6" } command { @@ -240,7 +240,7 @@ task CuppaChart { String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biowdl/cuppa:1.4" + String dockerImage = "quay.io/biowdl/cuppa:1.6" } command { From d5294222e69c6e793ea0d13e448e67b9482e5a10 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 3 Mar 2022 15:50:52 +0100 Subject: [PATCH 406/668] add orange, cupGenerateReport and (hopefully) fix sage plots --- hmftools.wdl | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 208 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 277c8dd4..75fd2d19 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -168,6 +168,62 @@ task Cobalt { } } +task CupGenerateReport { + input { + String sampleName + File cupData + String outputDir = "./cuppa" + + String memory = "5G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biowdl/cuppa:1.6" + } + + # This script writes to the directory that the input is located in. + # Giving the input directly will cause the script to write in the + # locallized input dir, which may cause issues with write permissions + # in certain execution engines or backends. We, therefore, make links + # to a working directory, and give that directory as input instead. + # We can't just use the outputDir directly. This could be an + # absolute path in which case the linking might fail due to name + # collisions. Outputs are copied to the given output dir afterwards. + command { + set -e + mkdir -p ./workdir ~{outputDir} + ln -s -t workdir ~{sep=" " cupData} + CupGenerateReport \ + ~{sampleName} \ + workdir + mv -t ~{outputDir} \ + ./workdir/~{sampleName}.cup.report.summry.png \ + ./workdir/~{sampleName}.cup.report.features.png \ + ./workdir/~{sampleName}_cup.report.pdf + } + + output { + File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summry.png" + File featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" + File reportPdf = "~{outputDir}/~{sampleName}_cup.report.pdf" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The sample id.", category: "required"} + cupData: {description: "The output produced by cuppa.", category: "required"} + outputDir: {description: "The directory the ouput will be placed in.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Cuppa { input { Array[File]+ linxOutput @@ -632,7 +688,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 1440 #FIXME + Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -678,6 +734,151 @@ task LinxVisualisations { } } +task Orange { + input { + String outputDir = "./orange" + File doidJson + Array[String] sampleDoids + String tumorName + String referenceName + File referenceMetrics + File tumorMetrics + File referenceFlagstats + File tumorFlagstats + File sageGermlineGeneCoverageTsv + File sageSomaticRefSampleBqrPlot + File sageSomaticTumorSampleBqrPlot + File purpleGeneCopyNumberTsv + File purpleGermlineDriverCatalogTsv + File purpleGermlineVariantVcf + File purpleGermlineVariantVcfIndex + Array[File]+ purplePlots + File purplePurityTsv + File purpleQcFile + File purpleSomaticDriverCatalogTsv + File purpleSomaticVariantVcf + File purpleSomaticVariantVcfIndex + File linxFusionTsv + File linxBreakendTsv + File linxDriverCatalogTsv + File linxDriverTsv + Array[File]+ linxPlots + File cuppaResultCsv + File cuppaSummaryPlot + File cuppaFeaturePlot + File chordPredictionTxt + File peachGenotypeTsv + File protectEvidenceTsv + File annotatedVirusTsv + #File pipelineVersionFile + File cohortMappingTsv + File cohortPercentilesTsv + + String memory = "17G" + String javaXmx = "16G" + Int timeMinutes = 1440 #FIXME + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -output_dir ~{outputDir} \ + -doid_json ~{doidJson} \ + -primary_tumor_doids '~{sep=";" sampleDoids}' \ + -max_evidence_level C \ + -tumor_sample_id ~{tumorName} \ + -reference_sample_id ~{referenceName} \ + -ref_sample_wgs_metrics_file ~{referenceMetrics} \ + -tumor_sample_wgs_metrics_file ~{tumorMetrics} \ + -ref_sample_flagstat_file ~{referenceFlagstats} \ + -tumor_sample_flagstat_file ~{tumorFlagstats} \ + -sage_germline_gene_coverage_tsv ~{sageGermlineGeneCoverageTsv} \ + -sage_somatic_ref_sample_bqr_plot ~{sageSomaticRefSampleBqrPlot} \ + -sage_somatic_tumor_sample_bqr_plot ~{sageSomaticTumorSampleBqrPlot} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \ + -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \ + -purple_germline_variant_vcf ~{purpleGermlineVariantVcf} \ + -purple_plot_directory ~{sub(purplePlots[0], basename(purplePlots[0]), "")} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{purpleQcFile} \ + -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \ + -purple_somatic_variant_vcf ~{purpleSomaticVariantVcf} \ + -linx_fusion_tsv ~{linxFusionTsv} \ + -linx_breakend_tsv ~{linxBreakendTsv} \ + -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \ + -linx_driver_tsv ~{linxDriverTsv} \ + -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ + -cuppa_result_csv ~{cuppaResultCsv} \ + -cuppa_summary_plot ~{cuppaSummaryPlot} \ + -cuppa_feature_plot ~{cuppaFeaturePlot} \ + -chord_prediction_txt ~{chordPredictionTxt} \ + -peach_genotype_tsv ~{peachGenotypeTsv} \ + -protect_evidence_tsv ~{protectEvidenceTsv} \ + -annotated_virus_tsv ~{annotatedVirusTsv} \ + -cohort_mapping_tsv ~{cohortMappingTsv} \ + -cohort_percentiles_tsv ~{cohortPercentilesTsv} + } + #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile} + + output { + File orangeJson = "~{outputDir}/~{tumorName}.orange.json" + File orangePdf = "~{outputDir}/~{tumorName}.orange.pdf" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "common"} + doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} + referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} + sageGermlineGeneCoverageTsv: {description: "Gene coverage file produced by the germline sage run.", category: "required"} + sageSomaticRefSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"} + sageSomaticTumorSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"} + purpleGeneCopyNumberTsv: {description: "Copy number tsv produced by purple.", category: "required"} + purpleGermlineDriverCatalogTsv: {description: "Germline driver catalog produced by purple.", category: "required"} + purpleGermlineVariantVcf: {description: "Germline variant vcf produced by purple.", category: "required"} + purplePlots: {description: "The plots generated by purple.", category: "required"} + purplePurityTsv: {description: "The purity file produced by purple.", category: "required"} + purpleQcFile: {description: "The qc file produced by purple.", category: "required"} + purpleSomaticDriverCatalogTsv: {description: "Somatic driver catalog produced by purple.", category: "required"} + purpleSomaticVariantVcf: {description: "Somatic variant vcf produced by purple.", category: "required"} + linxFusionTsv: {description: "The fusions tsv produced by linx.", category: "required"} + linxBreakendTsv: {description: "The breakend tsv produced by linx.", category: "required"} + linxDriverCatalogTsv: {description: "The driver catalog produced by linx.", category: "required"} + linxDriverTsv: {description: "The driver tsv produced by linx.", category: "required"} + linxPlots: {description: "The plots generated by linx.", category: "required"} + cuppaResultCsv: {description: "The cuppa results csv.", category: "required"} + cuppaSummaryPlot: {description: "The cuppa summary plot.", category: "required"} + cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "required"} + chordPredictionTxt: {description: "Chord prediction results.", category: "required"} + peachGenotypeTsv: {description: "Genotype tsv produced by peach.", category: "required"} + protectEvidenceTsv: {description: "Evidence tsv produced by protect.", category: "required"} + annotatedVirusTsv: {description: "Annotated virus tsv produced by virus-interpreter.", category: "required"} + #pipelineVersionFile: {description: "", category: "required"} + cohortMappingTsv: {description: "Cohort mapping file from the HMFTools resources.", category: "required"} + cohortPercentilesTsv: {description: "Cohort percentile file from the HMFTools resources.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Pave { input { String outputDir = "./" @@ -1024,7 +1225,7 @@ task Sage { String javaXmx = "50G" String memory = "51G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } command { @@ -1054,8 +1255,11 @@ task Sage { output { File outputVcf = outputPath File outputVcfIndex = outputPath + ".tbi" - # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. - # This seems to be a systemic issue with R generated plots in biocontainers... + File? referenceSageBqrPng = "~{referenceName}.sage.bqr.png" + File? referenceSageBqrTsv = "~{referenceName}.sage.bqr.tsv" + File tumorSageBqrPng = "~{tumorName}.sage.bqr.png" + File tumorSageBqrTsv = "~{tumorName}.sage.bqr.tsv" + File sageGeneCoverageTsv = "~{tumorName}.sage.gene.coverage.tsv" } runtime { From 960aa3cf0a713b6d7870b33c529e22b98b711aea Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Mar 2022 15:06:49 +0100 Subject: [PATCH 407/668] Slightly less records in RAM --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index e81cd4e3..436369d7 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,7 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" - Int maxRecordsInRam = 3000000 # Default is 500_000 but that will lead to very small files on disk. + Int maxRecordsInRam = 1500000 # Default is 500_000 but that will lead to very small files on disk. String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true From cf0b105cdf0a2ad7a2c1354857c281c18150a36b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 7 Mar 2022 10:32:35 +0100 Subject: [PATCH 408/668] Add missing whitespace. Co-authored-by: Davy Cats --- umi.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/umi.wdl b/umi.wdl index a32d646a..0dc5c55e 100644 --- a/umi.wdl +++ b/umi.wdl @@ -34,7 +34,9 @@ task BamReadNameToUmiTag { Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" } + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command <<< python < Date: Mon, 7 Mar 2022 12:15:51 +0100 Subject: [PATCH 409/668] Add parameter_meta for useSoftclippingforSupplementary --- bwa.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bwa.wdl b/bwa.wdl index 1cb170b7..373de628 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -94,6 +94,7 @@ task Mem { outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} usePostalt: {description: "Whether to use the postalt script from bwa kit."} + useSoftclippingForSupplementary: {description: "Use soft-clipping for supplementary alignments instead of hard-clipping", category: "common"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} From b070d3efbfcbd41ca3545a2eec0e5bd1a6dc2a3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 7 Mar 2022 12:19:13 +0100 Subject: [PATCH 410/668] Add parameter_meta for Picard UmiAwareMarkDuplicatesWithMateCigar --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index b6d9fadf..eea8d42f 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1160,8 +1160,10 @@ task UmiAwareMarkDuplicatesWithMateCigar { assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8ccfb0e0d3b3e31ad5aa08fc527ecaa46e77c589 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 7 Mar 2022 13:46:17 +0100 Subject: [PATCH 411/668] fix CupGenerateReport --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 75fd2d19..d9dea387 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -193,7 +193,7 @@ task CupGenerateReport { ln -s -t workdir ~{sep=" " cupData} CupGenerateReport \ ~{sampleName} \ - workdir + workdir/ mv -t ~{outputDir} \ ./workdir/~{sampleName}.cup.report.summry.png \ ./workdir/~{sampleName}.cup.report.features.png \ From 799811db76b369b057aa54555e08c3025c6905a0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:38:40 +0100 Subject: [PATCH 412/668] fix cupGenerateReport --- hmftools.wdl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index d9dea387..2e294ecd 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -195,15 +195,19 @@ task CupGenerateReport { ~{sampleName} \ workdir/ mv -t ~{outputDir} \ - ./workdir/~{sampleName}.cup.report.summry.png \ - ./workdir/~{sampleName}.cup.report.features.png \ - ./workdir/~{sampleName}_cup.report.pdf + ./workdir/~{sampleName}.cup.report.summary.png \ + ./workdir/~{sampleName}_cup_report.pdf + if [ -f ./workdir/~{sampleName}.cup.report.features.png ] + then + mv -t ~{outputDir} \ + ./workdir/~{sampleName}.cup.report.features.png + fi } output { - File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summry.png" - File featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" - File reportPdf = "~{outputDir}/~{sampleName}_cup.report.pdf" + File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summary.png" + File? featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" + File reportPdf = "~{outputDir}/~{sampleName}_cup_report.pdf" } runtime { @@ -765,7 +769,7 @@ task Orange { Array[File]+ linxPlots File cuppaResultCsv File cuppaSummaryPlot - File cuppaFeaturePlot + File? cuppaFeaturePlot File chordPredictionTxt File peachGenotypeTsv File protectEvidenceTsv @@ -812,7 +816,7 @@ task Orange { -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ -cuppa_result_csv ~{cuppaResultCsv} \ -cuppa_summary_plot ~{cuppaSummaryPlot} \ - -cuppa_feature_plot ~{cuppaFeaturePlot} \ + ~{"-cuppa_feature_plot " + cuppaFeaturePlot} \ -chord_prediction_txt ~{chordPredictionTxt} \ -peach_genotype_tsv ~{peachGenotypeTsv} \ -protect_evidence_tsv ~{protectEvidenceTsv} \ @@ -861,7 +865,7 @@ task Orange { linxPlots: {description: "The plots generated by linx.", category: "required"} cuppaResultCsv: {description: "The cuppa results csv.", category: "required"} cuppaSummaryPlot: {description: "The cuppa summary plot.", category: "required"} - cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "required"} + cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "common"} chordPredictionTxt: {description: "Chord prediction results.", category: "required"} peachGenotypeTsv: {description: "Genotype tsv produced by peach.", category: "required"} protectEvidenceTsv: {description: "Evidence tsv produced by protect.", category: "required"} From 5ae1f6de5c3c4efe38a792e3be1104bbacacea3b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:47:30 +0100 Subject: [PATCH 413/668] fix copy-paste error (orange docker image) --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2e294ecd..34941059 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -781,7 +781,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" Int timeMinutes = 1440 #FIXME - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + String dockerImage = "quay.io/quay.io/biowdl/orange:v1.6" } command { From 54d70a6b508f4a8360ce995a4bda5f6094225826 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:55:02 +0100 Subject: [PATCH 414/668] fix copy-paste error --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 34941059..5a480f93 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -781,7 +781,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" Int timeMinutes = 1440 #FIXME - String dockerImage = "quay.io/quay.io/biowdl/orange:v1.6" + String dockerImage = "quay.io/biowdl/orange:v1.6" } command { From 9ca13a0a999ff874d041d26c4860c8c07edbe92d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Mar 2022 10:16:59 +0100 Subject: [PATCH 415/668] Remove duplicate options for markduplicates --- picard.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index eea8d42f..3d835829 100644 --- a/picard.wdl +++ b/picard.wdl @@ -726,8 +726,6 @@ task MarkDuplicates { CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \ - USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { From ac55982a7acf3c06460ae0b8ac2c394865eeaa4c Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 18 Mar 2022 12:46:53 +0100 Subject: [PATCH 416/668] run tabix if vcf index is missing in gridss --- gridss.wdl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index d3d251a5..92d7df1e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -116,6 +116,12 @@ task GRIDSS { ~{normalBam} \ ~{tumorBam} samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai + + # For some reason the VCF index is sometimes missing + if [ ! -e ~{outputPrefix}.vcf.gz.tbi ] + then + tabix ~{outputPrefix}.vcf.gz + fi } output { From 173bb2e6547c1fa4ee20ec5da98368522e18b887 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 18 Mar 2022 12:49:26 +0100 Subject: [PATCH 417/668] update changelog --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c0db947..b028b60a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ The GRIDSS task will now run tabix separately if GRIDSS doesn't + produce a vcf index. + Added a task for SnpEff. + Adjusted runtime settings for sambamba Markdup. + Added a task for sambamba Flagstat. @@ -28,7 +30,7 @@ version 5.1.0-dev + Sage + VirusInterpreter + Added a task for VirusBreakend. -+ Added a task for GridssAnnotateVcfRepeatmasker. ++ Added a task for GridssAnnotateVcfRepeatmasker. + Bumped GRIDSS version to 2.12.2. + Adjusted GRIDSS runtime settings. + Added optional inputs to GRIDSS: @@ -147,7 +149,7 @@ version 4.0.0 + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. - Using more threads reduces the chance of the samtools sort pipe getting + Using more threads reduces the chance of the samtools sort pipe getting blocked if it's full. + Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl, transcriptclean.wdl to be more descriptive. From 1c02ce1ea5464c11491f9dc67802ab71cb46dbcb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 12:44:01 +0200 Subject: [PATCH 418/668] add task for sv type annotation of gridss results --- gridss.wdl | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 92d7df1e..f771ebe4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -79,6 +79,69 @@ task AnnotateInsertedSequence { } } +task AnnotateSvType { + input { + File gridssVcf + File gridssVcfIndex + String outputPath = "./gridss.svtyped.vcf" + + String memory = "32G" + String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" + Int timeMinutes = 240 + } + + # Based on https://github.com/PapenfussLab/gridss/issues/74 + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + R --vanilla << EOF + library(VariantAnnotation) + library(StructuralVariantAnnotation) + + vcf_path <- "~{gridssVcf}" + out_path <- "~{outputPath}" + + # Simple SV type classifier + simpleEventType <- function(gr) { + return(ifelse(seqnames(gr) != seqnames(partner(gr)), "BND", # inter-chromosomosal + ifelse(gr$insLen >= abs(gr$svLen) * 0.7, "INS", + ifelse(strand(gr) == strand(partner(gr)), "INV", + ifelse(xor(start(gr) < start(partner(gr)), strand(gr) == "-"), "DEL", + "DUP"))))) + } + + header <- scanVcfHeader(vcf_path) + vcf <- readVcf(vcf_path, seqinfo(header)) + gr <- breakpointRanges(vcf) + svtype <- simpleEventType(gr) + info(vcf[gr$sourceId])$SVTYPE <- svtype + writeVcf(vcf, out_path) + EOF + >>> + + output { + File vcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + gridssVcf: {description: "The VCF produced by GRIDSS.", category: "required"} + gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { File tumorBam From 87bb3c4f2104cb3c8a020aa0abfb7f5a4faa387a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 13:32:48 +0200 Subject: [PATCH 419/668] copy paste error --- gridss.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index f771ebe4..b38f344e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -134,8 +134,6 @@ task AnnotateSvType { gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 1b4238c66c6150e57e128086d16d6939a1198406 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 14:14:33 +0200 Subject: [PATCH 420/668] typo --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index b38f344e..00705392 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -79,7 +79,7 @@ task AnnotateInsertedSequence { } } -task AnnotateSvType { +task AnnotateSvTypes { input { File gridssVcf File gridssVcfIndex From bd153caa313e5fad73d2716813f7eb02c36b963c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jun 2022 14:56:45 +0200 Subject: [PATCH 421/668] adjust gridss threads --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index d3d251a5..b118af9d 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -95,7 +95,7 @@ task GRIDSS { Int jvmHeapSizeGb = 300 Int nonJvmMemoryGb = 50 - Int threads = 4 + Int threads = 16 Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } From 873ece6f64e85bea10c28754f3260de155cc8d80 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 17 Jun 2022 14:59:35 +0200 Subject: [PATCH 422/668] adjust some runtime settings --- bedtools.wdl | 2 +- gridss.wdl | 10 +++++----- hmftools.wdl | 8 ++++---- sambamba.wdl | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 1d956cab..80a281d6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -76,7 +76,7 @@ task Coverage { String outputPath = "./coverage.tsv" String memory = "8G" - Int timeMinutes = 120 + Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } diff --git a/gridss.wdl b/gridss.wdl index b118af9d..c1a41a25 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,9 +93,9 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 300 - Int nonJvmMemoryGb = 50 - Int threads = 16 + Int jvmHeapSizeGb = 64 + Int nonJvmMemoryGb = 10 + Int threads = 12 Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } @@ -216,9 +216,9 @@ task Virusbreakend { String outputPath = "./virusbreakend.vcf" String memory = "75G" - Int threads = 8 + Int threads = 12 String dockerImage = "quay.io/biowdl/gridss:2.12.2" - Int timeMinutes = 180 + Int timeMinutes = 320 } command { diff --git a/hmftools.wdl b/hmftools.wdl index 5a480f93..ef6355c4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -780,7 +780,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" - Int timeMinutes = 1440 #FIXME + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/orange:v1.6" } @@ -1225,9 +1225,9 @@ task Sage { String? mnvFilterEnabled File? coverageBed - Int threads = 4 - String javaXmx = "50G" - String memory = "51G" + Int threads = 32 + String javaXmx = "120G" + String memory = "121G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } diff --git a/sambamba.wdl b/sambamba.wdl index 4c2115e0..6696668a 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -28,7 +28,7 @@ task Flagstat { Int threads = 2 String memory = "8G" - Int timeMinutes = 120 + Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From af5cf337f77dff48e4526e1da9ca6688a1fbe56c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 22 Jun 2022 12:48:03 +0200 Subject: [PATCH 423/668] adjust sage memory and time --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index ef6355c4..6c6ef045 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1226,9 +1226,9 @@ task Sage { File? coverageBed Int threads = 32 - String javaXmx = "120G" - String memory = "121G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) + String javaXmx = "8G" + String memory = "9G" + Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } From 4608518f1afa3159658731aaac2dbfc32bedd8b8 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Jun 2022 11:09:25 +0200 Subject: [PATCH 424/668] increase sage memory --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6c6ef045..32bc24fd 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1226,8 +1226,8 @@ task Sage { File? coverageBed Int threads = 32 - String javaXmx = "8G" - String memory = "9G" + String javaXmx = "16G" + String memory = "20G" Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } From 01aa41d21addca2002f1269ba41e165c33e9e03e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 14:09:40 +0200 Subject: [PATCH 425/668] fix heredoc --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 00705392..0e8fd434 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -94,7 +94,7 @@ task AnnotateSvTypes { command <<< set -e mkdir -p "$(dirname ~{outputPath})" - R --vanilla << EOF + R --vanilla << "EOF" library(VariantAnnotation) library(StructuralVariantAnnotation) @@ -115,7 +115,7 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - writeVcf(vcf, out_path) + writeVcf(vcf, out_path, index=T) EOF >>> From 39af0ad74c6296b2f9aa536ecb2ba123a156670e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 14:12:14 +0200 Subject: [PATCH 426/668] fix output name --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 0e8fd434..d0428e59 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -83,7 +83,7 @@ task AnnotateSvTypes { input { File gridssVcf File gridssVcfIndex - String outputPath = "./gridss.svtyped.vcf" + String outputPath = "./gridss.svtyped.vcf.bgz" String memory = "32G" String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" From 4e2a09e11c36a69b84451c44bf70c50825d67746 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 15:57:39 +0200 Subject: [PATCH 427/668] detect if compressed --- gridss.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index d0428e59..c12c24d6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -90,6 +90,8 @@ task AnnotateSvTypes { Int timeMinutes = 240 } + String index = if sub(outputPath, "\\.bgz", "") != outputPath then "T" else "F" + # Based on https://github.com/PapenfussLab/gridss/issues/74 command <<< set -e @@ -115,7 +117,7 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - writeVcf(vcf, out_path, index=T) + writeVcf(vcf, out_path, index=~{index}) EOF >>> From 358c946dc86024324455193032d53873b8361d33 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 16:36:09 +0200 Subject: [PATCH 428/668] fix duoble .bgz and and index to output --- gridss.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index c12c24d6..38daa029 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -90,7 +90,9 @@ task AnnotateSvTypes { Int timeMinutes = 240 } - String index = if sub(outputPath, "\\.bgz", "") != outputPath then "T" else "F" + String effectiveOutputPath = sub(outputPath, "\\.bgz", "") + String index = if effectiveOutputPath != outputPath then "T" else "F" + # Based on https://github.com/PapenfussLab/gridss/issues/74 command <<< @@ -101,7 +103,7 @@ task AnnotateSvTypes { library(StructuralVariantAnnotation) vcf_path <- "~{gridssVcf}" - out_path <- "~{outputPath}" + out_path <- "~{effectiveOutputPath}" # Simple SV type classifier simpleEventType <- function(gr) { @@ -123,6 +125,7 @@ task AnnotateSvTypes { output { File vcf = outputPath + File? vcfIndex = outputPath + ".tbi" } runtime { From 760f89e95596cb55ef2b78c27bb61c85cadedcc2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 Jul 2022 10:13:48 +0200 Subject: [PATCH 429/668] give bcftools sort more time --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 88d97cd0..589cddea 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -187,7 +187,7 @@ task Sort { String tmpDir = "./sorting-tmp" String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 8e7ca0ce64ef97b3ba7859b245377294754edbd0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 Jul 2022 14:07:19 +0200 Subject: [PATCH 430/668] increase memory for bcftools sort --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 589cddea..2bf1c732 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -186,7 +186,7 @@ task Sort { String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" - String memory = "256M" + String memory = "5G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 3af704d65bf0ced2b0a76e049e1019031e2d1941 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 4 Jul 2022 13:04:22 +0200 Subject: [PATCH 431/668] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71309ae8..986582dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task to add SVTYPE annotations to GRIDSS results + (`AnnotateSvTypes`). + The GRIDSS task will now run tabix separately if GRIDSS doesn't produce a vcf index. + Add a script to subtract UMI's from the read name and add them as From 0f3cb30df3276150f6b168ebfc43ed596d9f140b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 4 Jul 2022 16:10:59 +0200 Subject: [PATCH 432/668] Add GT to gridss results in AnnotateSvTypes --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 38daa029..35e41d21 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -119,6 +119,8 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype + # GRIDSS doesn't supply a GT, so we estimate GT based on AF (assuming CN of 2, might be inaccurate) + geno(vcf)$GT <- ifelse(geno(vcf)$AF > 0.75, "1/1", ifelse(geno(vcf)$AF < 0.25, "0/0", "0/1")) writeVcf(vcf, out_path, index=~{index}) EOF >>> From cbd6de84edb3776aef10e774f2d15f8c29902490 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 13:20:33 +0200 Subject: [PATCH 433/668] fix typo in star GenomeGenerate parameter_meta --- star.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index aa1fd608..6a123c86 100644 --- a/star.wdl +++ b/star.wdl @@ -78,7 +78,7 @@ task GenomeGenerate { parameter_meta { # inputs - genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} + genomeDir: {description:"The directory the STAR index should be written to.", category: "common"} referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtf: {description: "The reference GTF file.", category: "common"} sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} From 9625c84b6749aa6b93f933d8a9bf307231dd73e7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 15:12:44 +0200 Subject: [PATCH 434/668] update changelog --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986582dd..afd115c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,19 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task for CupGenerateReport. ++ Updated Cuppa to version 1.6. ++ Added a task for Gripss. ++ Fixed the HealthChecker task's determination of the `succeeded` output + value. ++ Updated Linx to version 1.18. ++ Added a task for LinxVisualization. ++ Added a task for HMFtools Orange. ++ Added a task for HMFtools Pave. ++ Updated Purple to version 3.2. ++ Added plot and table outputs of Sage to task outputs. ++ Updated virus-interpreter to version 1.2. ++ Updated Peach to version 1.5. + Added a task to add SVTYPE annotations to GRIDSS results (`AnnotateSvTypes`). + The GRIDSS task will now run tabix separately if GRIDSS doesn't From 743e4e0615aa3568f391e65b3fc064e188a6f12e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 15:35:42 +0200 Subject: [PATCH 435/668] fix linting issue --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 5a480f93..628e2f9b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -190,7 +190,7 @@ task CupGenerateReport { command { set -e mkdir -p ./workdir ~{outputDir} - ln -s -t workdir ~{sep=" " cupData} + ln -s -t workdir ~{cupData} CupGenerateReport \ ~{sampleName} \ workdir/ From e996b7930959027c31a1f7a2fd4683692a13a8a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 Aug 2022 10:00:49 +0200 Subject: [PATCH 436/668] increase time for cobalt --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 32bc24fd..a59b3897 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -114,7 +114,7 @@ task Cobalt { Int threads = 1 String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 240 + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } From e43bf3e4364a919cd3b380c58bb347d6be3a8069 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 Aug 2022 10:38:48 +0200 Subject: [PATCH 437/668] update changelog --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index afd115c8..f750b212 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,22 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Bedtools coverage's timeMinutes now defaults to `320`. ++ Gridss' runtime attribute defaults were changed to: + + jvmHeapSizeGb: `64` + + nonJvmMemoryGb: `10` + + threads: `12` ++ Virusbreakend's runtime attribute defaults were changed to: + + threads: `12` + + timeMinutes: `320` ++ Cobalt's timeMinutes now defaults to `480`. ++ Orange's timeMinutes now defaults to 10. ++ Sage's runtime attributes were changed to: + + threads: `32` + + javaXmx: `"16G"` + + memory: `"20G"` + + timeMinutes: `720` ++ Sambamba's runtimeMinutes nor defaults to `320`. + Added a task for CupGenerateReport. + Updated Cuppa to version 1.6. + Added a task for Gripss. From 24cc6213026dbe1de017ebeabc2de7fbfad912ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 8 Aug 2022 11:11:48 +0200 Subject: [PATCH 438/668] make purple's somaticRainfallPlot output optional --- CHANGELOG.md | 2 ++ hmftools.wdl | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f750b212..be0e5a7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Purple's `somaticRainfallPlot` output is now optional and included in + the `plots` output as well. + Bedtools coverage's timeMinutes now defaults to `320`. + Gridss' runtime attribute defaults were changed to: + jvmHeapSizeGb: `64` diff --git a/hmftools.wdl b/hmftools.wdl index 1542bdfc..f878181a 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1133,7 +1133,7 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" + File? somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" @@ -1150,8 +1150,8 @@ task Purple { purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] - Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, - segmentPlot, somaticClonalityPlot, somaticPlot] + Array[File] plots = select_all([circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, + segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot]) Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, circosSnp] From 8993b5c662428a0bcdc5d2fd4806812b061db529 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Aug 2022 14:25:59 +0200 Subject: [PATCH 439/668] Use gebibytes instead of gigabytes --- CPAT.wdl | 4 +-- bam2fastx.wdl | 4 +-- bcftools.wdl | 10 +++---- bedtools.wdl | 16 +++++------ biowdl.wdl | 2 +- bowtie.wdl | 2 +- bwa-mem2.wdl | 4 +-- bwa.wdl | 6 ++-- ccs.wdl | 2 +- centrifuge.wdl | 10 +++---- chunked-scatter.wdl | 4 +-- clever.wdl | 4 +-- collect-columns.wdl | 2 +- common.wdl | 20 +++++++------- cutadapt.wdl | 2 +- deconstructsigs.wdl | 2 +- deepvariant.wdl | 2 +- delly.wdl | 2 +- duphold.wdl | 2 +- extractSigPredictHRD.wdl | 2 +- fastqc.wdl | 2 +- fastqsplitter.wdl | 2 +- fgbio.wdl | 2 +- flash.wdl | 2 +- gatk.wdl | 56 ++++++++++++++++++------------------- gffcompare.wdl | 4 +-- gffread.wdl | 4 +-- gridss.wdl | 10 +++---- hisat2.wdl | 2 +- hmftools.wdl | 34 +++++++++++------------ htseq.wdl | 4 +-- isoseq3.wdl | 2 +- lima.wdl | 2 +- macs2.wdl | 2 +- manta.wdl | 4 +-- minimap2.wdl | 4 +-- multiqc.wdl | 6 ++-- nanopack.wdl | 4 +-- pacbio.wdl | 4 +-- pbbam.wdl | 2 +- pbmm2.wdl | 2 +- peach.wdl | 2 +- picard.wdl | 60 ++++++++++++++++++++-------------------- prepareShiny.wdl | 4 +-- rtg.wdl | 8 +++--- sambamba.wdl | 10 +++---- samtools.wdl | 34 +++++++++++------------ scripts | 2 +- smoove.wdl | 2 +- snpeff.wdl | 2 +- somaticseq.wdl | 10 +++---- spades.wdl | 2 +- star.wdl | 10 +++---- strelka.wdl | 4 +-- stringtie.wdl | 4 +-- survivor.wdl | 2 +- talon.wdl | 20 +++++++------- transcriptclean.wdl | 6 ++-- umi-tools.wdl | 6 ++-- umi.wdl | 4 +-- unicycler.wdl | 2 +- vardict.wdl | 2 +- vt.wdl | 2 +- whatshap.wdl | 6 ++-- wisestork.wdl | 8 +++--- 65 files changed, 234 insertions(+), 234 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index e6cef3ea..b96ea0d7 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -34,8 +34,8 @@ task CPAT { Array[String]? startCodons Array[String]? stopCodons - String memory = "4G" - Int timeMinutes = 10 + ceil(size(gene, "G") * 30) + String memory = "4GiB" + Int timeMinutes = 10 + ceil(size(gene, "GiB") * 30) String dockerImage = "quay.io/biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0bdccca8..62827fd9 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -30,7 +30,7 @@ task Bam2Fasta { String? seqIdPrefix - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } @@ -98,7 +98,7 @@ task Bam2Fastq { String? seqIdPrefix - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } diff --git a/bcftools.wdl b/bcftools.wdl index 2bf1c732..726d2e37 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,7 +47,7 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -138,7 +138,7 @@ task Filter { String? softFilter String outputPath = "./filtered.vcf.gz" - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + ceil(size(vcf, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -186,7 +186,7 @@ task Sort { String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -261,7 +261,7 @@ task Stats { String? userTsTv Int threads = 0 - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -350,7 +350,7 @@ task View { String? exclude String? include - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bedtools.wdl b/bedtools.wdl index 80a281d6..fe18ede6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -26,7 +26,7 @@ task Complement { File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" - String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" + String memory = "~{512 + ceil(size([inputBed, faidx], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -75,7 +75,7 @@ task Coverage { File? bIndex String outputPath = "./coverage.tsv" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } @@ -120,7 +120,7 @@ task Merge { File inputBed String outputBed = "merged.bed" - String memory = "~{512 + ceil(size(inputBed, "M"))}M" + String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -159,7 +159,7 @@ task MergeBedFiles { Array[File]+ bedFiles String outputBed = "merged.bed" - String memory = "~{512 + ceil(size(bedFiles, "M"))}M" + String memory = "~{512 + ceil(size(bedFiles, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -207,8 +207,8 @@ task Sort { File? genome File? faidx - String memory = "~{512 + ceil(size(inputBed, "M"))}M" - Int timeMinutes = 1 + ceil(size(inputBed, "G")) + String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" + Int timeMinutes = 1 + ceil(size(inputBed, "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -267,8 +267,8 @@ task Intersect { File? faidx # Giving a faidx file will set the sorted option. - String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" - Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) + String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" + Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } diff --git a/biowdl.wdl b/biowdl.wdl index dead8303..f891618e 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -32,7 +32,7 @@ task InputConverter { Boolean checkFileMd5sums=false Boolean old=false - String memory = "128M" + String memory = "128MiB" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } diff --git a/bowtie.wdl b/bowtie.wdl index 87210dcd..7e817594 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -37,7 +37,7 @@ task Bowtie { String picardXmx = "4G" Int threads = 1 - String memory = "~{5 + ceil(size(indexFiles, "G"))}G" + String memory = "~{5 + ceil(size(indexFiles, "GiB"))}GiB" Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 4566e68c..b3db0ad1 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -36,7 +36,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "GiB") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } @@ -84,7 +84,7 @@ task Mem { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/bwa.wdl b/bwa.wdl index 373de628..d4f4495a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 300 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "GiB") * 300 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "GiB") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. @@ -81,7 +81,7 @@ task Mem { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/ccs.wdl b/ccs.wdl index 29f1a7f9..27db15ab 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -42,7 +42,7 @@ task CCS { String? chunkString Int threads = 2 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } diff --git a/centrifuge.wdl b/centrifuge.wdl index 07dc7f85..757af239 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -36,7 +36,7 @@ task Build { File? sizeTable Int threads = 5 - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -109,7 +109,7 @@ task Classify { String? excludeTaxIDs Int threads = 4 - String memory = "16G" + String memory = "16GiB" Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -186,7 +186,7 @@ task Inspect { Int? across - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -245,7 +245,7 @@ task KReport { Int? minimumScore Int? minimumLength - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -303,7 +303,7 @@ task KTimportTaxonomy { File inputFile String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 String dockerImage = "biocontainers/krona:v2.7.1_cv1" } diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 66954c36..af24b139 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -30,7 +30,7 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } @@ -84,7 +84,7 @@ task ScatterRegions { Int? scatterSize - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } diff --git a/clever.wdl b/clever.wdl index 186be514..791a0ba1 100644 --- a/clever.wdl +++ b/clever.wdl @@ -34,7 +34,7 @@ task Mateclever { Int maxOffset = 150 Int threads = 10 - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -94,7 +94,7 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "55G" + String memory = "55GiB" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } diff --git a/collect-columns.wdl b/collect-columns.wdl index 3d65c7e7..03ccb6f7 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -62,7 +62,7 @@ task CollectColumns { } runtime { - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/common.wdl b/common.wdl index 1e4fc8cb..1ce2895f 100644 --- a/common.wdl +++ b/common.wdl @@ -25,7 +25,7 @@ task AppendToStringArray { Array[String] array String string - String memory = "1G" + String memory = "1GiB" } command { @@ -51,7 +51,7 @@ task CheckFileMD5 { # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -75,7 +75,7 @@ task ConcatenateTextFiles { Boolean unzip = false Boolean zip = false - String memory = "1G" + String memory = "1GiB" } # When input and output is both compressed decompression is not needed. @@ -104,7 +104,7 @@ task Copy { Boolean recursive = false # Version not that important as long as it is stable. - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -132,7 +132,7 @@ task CreateLink { String inputFile String outputPath - String memory = "1G" + String memory = "1GiB" } command { @@ -170,7 +170,7 @@ task GetSamplePositionInArray { runtime { # 4 gigs of memory to be able to build the docker image in singularity. - memory: "4G" + memory: "4GiB" docker: dockerImage timeMinutes: 5 } @@ -190,7 +190,7 @@ task MapMd5 { input { Map[String,String] map - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -214,7 +214,7 @@ task StringArrayMd5 { input { Array[String] stringArray - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -238,7 +238,7 @@ task TextToFile { String text String outputFile = "out.txt" - String memory = "1G" + String memory = "1GiB" Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -274,7 +274,7 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - String memory = "128M" + String memory = "128MiB" Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" diff --git a/cutadapt.wdl b/cutadapt.wdl index b49a95d4..9a67692c 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -83,7 +83,7 @@ task Cutadapt { Boolean? noZeroCap Int cores = 4 - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } diff --git a/deconstructsigs.wdl b/deconstructsigs.wdl index ef47e3e3..c44bf9c0 100644 --- a/deconstructsigs.wdl +++ b/deconstructsigs.wdl @@ -27,7 +27,7 @@ task DeconstructSigs { String outputPath = "./signatures.rds" Int timeMinutes = 15 - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/r-deconstructsigs:1.9.0--r41hdfd78af_1" } diff --git a/deepvariant.wdl b/deepvariant.wdl index 28aee813..25d05bd9 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -37,7 +37,7 @@ task RunDeepVariant { String? sampleName Boolean? VCFStatsReport = true - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 5000 String dockerImage = "google/deepvariant:1.0.0" } diff --git a/delly.wdl b/delly.wdl index bf00ed36..7333c5ff 100644 --- a/delly.wdl +++ b/delly.wdl @@ -28,7 +28,7 @@ task CallSV { File referenceFastaFai String outputPath = "./delly/delly.bcf" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } diff --git a/duphold.wdl b/duphold.wdl index 80fe31d2..0426da56 100644 --- a/duphold.wdl +++ b/duphold.wdl @@ -30,7 +30,7 @@ task Duphold { String sample String outputPath = "./duphold.vcf" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 2b5d9781..1520b608 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -30,7 +30,7 @@ task ExtractSigPredictHRD { File svVcfIndex Boolean hg38 = false - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" } diff --git a/fastqc.wdl b/fastqc.wdl index 3a07db4e..d821e531 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -45,7 +45,7 @@ task Fastqc { # weird edge case fastq's. String javaXmx="1750M" Int threads = 1 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" diff --git a/fastqsplitter.wdl b/fastqsplitter.wdl index 25a50954..4a02697c 100644 --- a/fastqsplitter.wdl +++ b/fastqsplitter.wdl @@ -63,7 +63,7 @@ task Fastqsplitter { runtime { cpu: cores - memory: "~{memory}G" + memory: "~{memory}GiB" docker: dockerImage } } diff --git a/fgbio.wdl b/fgbio.wdl index d50906d3..15fb0ea4 100644 --- a/fgbio.wdl +++ b/fgbio.wdl @@ -26,7 +26,7 @@ task AnnotateBamWithUmis { File inputUmi String outputPath - String memory = "120G" + String memory = "120GiB" Int timeMinutes = 360 String javaXmx="100G" String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0" diff --git a/flash.wdl b/flash.wdl index c4554c50..7b50e0d7 100644 --- a/flash.wdl +++ b/flash.wdl @@ -34,7 +34,7 @@ task Flash { Int? maxOverlap Int threads = 2 - String memory = "2G" + String memory = "2GiB" } command { diff --git a/gatk.wdl b/gatk.wdl index 5cf7c673..0b93efe6 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -34,7 +34,7 @@ task AnnotateIntervals { File? segmentalDuplicationTrack String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -129,7 +129,7 @@ task ApplyBQSR { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -197,7 +197,7 @@ task BaseRecalibrator { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -232,7 +232,7 @@ task CalculateContamination { File? normalPileups String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -279,7 +279,7 @@ task CallCopyRatioSegments { File copyRatioSegments String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -332,7 +332,7 @@ task CollectAllelicCounts { File? commonVariantSitesIndex String javaXmx = "10G" - String memory = "11G" + String memory = "11GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -390,7 +390,7 @@ task CollectReadCounts { String intervalMergingRule = "OVERLAPPING_ONLY" String javaXmx = "7G" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -449,7 +449,7 @@ task CombineGVCFs { File referenceFastaFai String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -509,7 +509,7 @@ task CombineVariants { String outputPath String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" } @@ -579,7 +579,7 @@ task CreateReadCountPanelOfNormals { File? annotatedIntervals String javaXmx = "7G" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 5 # The biocontainer causes a spark related error for some reason. String dockerImage = "broadinstitute/gatk:4.1.8.0" @@ -629,7 +629,7 @@ task DenoiseReadCounts { File? annotatedIntervals String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -690,7 +690,7 @@ task FilterMutectCalls { File? artifactPriors String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -775,7 +775,7 @@ task GatherBqsrReports { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -805,7 +805,7 @@ task GenomicsDBImport { String? tmpDir String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -866,7 +866,7 @@ task GenotypeGVCFs { File? pedigree String javaXmx = "6G" - String memory = "7G" + String memory = "7GiB" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -932,7 +932,7 @@ task GetPileupSummaries { String outputPrefix String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1032,7 +1032,7 @@ task HaplotypeCaller { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -1073,7 +1073,7 @@ task LearnReadOrientationModel { Array[File]+ f1r2TarGz String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1114,7 +1114,7 @@ task MergeStats { Array[File]+ stats String javaXmx = "14G" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1162,7 +1162,7 @@ task ModelSegments { File? normalAllelicCounts String javaXmx = "10G" - String memory = "11G" + String memory = "11GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1250,7 +1250,7 @@ task MuTect2 { File? panelOfNormalsIndex String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1325,7 +1325,7 @@ task PlotDenoisedCopyRatios { Int? minimumContigLength String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1393,7 +1393,7 @@ task PlotModeledSegments { Int? minimumContigLength String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1454,7 +1454,7 @@ task PreprocessIntervals { File? intervals String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1516,7 +1516,7 @@ task SelectVariants { String? selectTypeToInclude String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1576,7 +1576,7 @@ task SplitNCigarReads { Array[File] intervals = [] String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1645,7 +1645,7 @@ task VariantEval { File? dbsnpVCFIndex String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" @@ -1722,7 +1722,7 @@ task VariantFiltration { Array[File] intervals = [] String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } diff --git a/gffcompare.wdl b/gffcompare.wdl index d06602bc..fe1db0a8 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -46,8 +46,8 @@ task GffCompare { Int? maxDistanceGroupingTranscriptStartSites String? namePrefix - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputGtfFiles, "GiB") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" # This workaround only works in the input section. diff --git a/gffread.wdl b/gffread.wdl index a04540f5..26a2773c 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,8 +32,8 @@ task GffRead { String? proteinFastaPath String? filteredGffPath - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputGff, "GiB") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } diff --git a/gridss.wdl b/gridss.wdl index add3c08f..cfe53751 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 120 } @@ -85,7 +85,7 @@ task AnnotateSvTypes { File gridssVcfIndex String outputPath = "./gridss.svtyped.vcf.bgz" - String memory = "32G" + String memory = "32GiB" String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" Int timeMinutes = 240 } @@ -201,7 +201,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}G" + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}GiB" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -239,7 +239,7 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "25G" + String memory = "25GiB" Int threads = 8 String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 1440 @@ -289,7 +289,7 @@ task Virusbreakend { File virusbreakendDB String outputPath = "./virusbreakend.vcf" - String memory = "75G" + String memory = "75GiB" Int threads = 12 String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 320 diff --git a/hisat2.wdl b/hisat2.wdl index a2c0777c..50fabc9d 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -82,7 +82,7 @@ task Hisat2 { runtime { cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/hmftools.wdl b/hmftools.wdl index f878181a..26ab4e4a 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,7 +35,7 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "70G" + String memory = "70GiB" String javaXmx = "64G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" @@ -112,7 +112,7 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = "5G" + String memory = "5GiB" String javaXmx = "4G" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" @@ -174,7 +174,7 @@ task CupGenerateReport { File cupData String outputDir = "./cuppa" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -242,7 +242,7 @@ task Cuppa { String outputDir = "./cuppa" String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -298,7 +298,7 @@ task CuppaChart { File cupData String outputDir = "./cuppa" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -348,7 +348,7 @@ task Gripss { File vcfIndex String outputDir = "./" - String memory = "17G" + String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 50 String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" @@ -419,7 +419,7 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "32G" + String memory = "32GiB" String javaXmx = "31G" Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" @@ -478,7 +478,7 @@ task GripssHardFilterApplicationKt { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" - String memory = "3G" + String memory = "3GiB" String javaXmx = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" @@ -527,7 +527,7 @@ task HealthChecker { Array[File]+ purpleOutput String javaXmx = "2G" - String memory = "1G" + String memory = "3GiB" Int timeMinutes = 1 String dockerImage = "quay.io/biowdl/health-checker:3.2" } @@ -604,7 +604,7 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9G" + String memory = "9iB" String javaXmx = "8G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" @@ -690,7 +690,7 @@ task LinxVisualisations { Array[File]+ linxOutput Boolean plotReportable = true - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" @@ -778,7 +778,7 @@ task Orange { File cohortMappingTsv File cohortPercentilesTsv - String memory = "17G" + String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/orange:v1.6" @@ -902,7 +902,7 @@ task Pave { Int timeMinutes = 50 String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" String dockerImage = "quay.io/biowdl/pave:v1.0" } @@ -979,7 +979,7 @@ task Protect { File chordPrediction File annotatedVirus - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biowdl/protect:v2.0" @@ -1078,7 +1078,7 @@ task Purple { Int threads = 1 Int timeMinutes = 30 - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" @@ -1227,7 +1227,7 @@ task Sage { Int threads = 32 String javaXmx = "16G" - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } @@ -1315,7 +1315,7 @@ task VirusInterpreter { File virusReportingDbTsv String outputDir = "." - String memory = "3G" + String memory = "3GiB" String javaXmx = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biowdl/virus-interpreter:1.2" diff --git a/htseq.wdl b/htseq.wdl index 76d3bb83..92bc4423 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -33,8 +33,8 @@ task HTSeqCount { String? idattr Int nprocesses = 1 - String memory = "8G" - Int timeMinutes = 1440 #10 + ceil(size(inputBams, "G") * 60) FIXME + String memory = "8GiB" + Int timeMinutes = 1440 #10 + ceil(size(inputBams, "GiB") * 60) FIXME String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" } diff --git a/isoseq3.wdl b/isoseq3.wdl index aacbfc60..77f19f80 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,7 +32,7 @@ task Refine { String outputNamePrefix Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/isoseq3:3.4.0--0" } diff --git a/lima.wdl b/lima.wdl index 6b87ad4f..eece2b3f 100644 --- a/lima.wdl +++ b/lima.wdl @@ -49,7 +49,7 @@ task Lima { String outputPrefix Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/lima:2.2.0--h9ee0642_0" } diff --git a/macs2.wdl b/macs2.wdl index 2afe3bbe..e6a011ad 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -31,7 +31,7 @@ task PeakCalling { String format = "AUTO" Boolean nomodel = false Int timeMinutes = 600 # Default to 10 hours - String memory = "8G" + String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } diff --git a/manta.wdl b/manta.wdl index 1c949af2..6804f304 100644 --- a/manta.wdl +++ b/manta.wdl @@ -60,7 +60,7 @@ task Germline { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } @@ -138,7 +138,7 @@ task Somatic { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } diff --git a/minimap2.wdl b/minimap2.wdl index 50ff4db3..96cc7734 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -31,7 +31,7 @@ task Indexing { Int? splitIndex Int cores = 1 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } @@ -98,7 +98,7 @@ task Mapping { String? howToFindGTAG Int cores = 4 - String memory = "30G" + String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } diff --git a/multiqc.wdl b/multiqc.wdl index a1662937..21fc8a7d 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -57,11 +57,11 @@ task MultiQC { String? clConfig String? memory - Int timeMinutes = 10 + ceil(size(reports, "G") * 8) + Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } - Int memoryGb = 2 + ceil(size(reports, "G")) + Int memoryGb = 2 + ceil(size(reports, "GiB")) # This is where the reports end up. It does not need to be changed by the # user. It is full of symbolic links, so it is not of any use to the user @@ -139,7 +139,7 @@ task MultiQC { } runtime { - memory: select_first([memory, "~{memoryGb}G"]) + memory: select_first([memory, "~{memoryGb}GiB"]) time_minutes: timeMinutes docker: dockerImage } diff --git a/nanopack.wdl b/nanopack.wdl index e4c94a43..bd3f433e 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -40,7 +40,7 @@ task NanoPlot { String? readType Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoplot:1.38.0--pyhdfd78af_0" } @@ -130,7 +130,7 @@ task NanoQc { Int? minLength - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" } diff --git a/pacbio.wdl b/pacbio.wdl index b21c69bc..dcf0f69e 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -25,7 +25,7 @@ task mergePacBio { Array[File]+ reports String outputPathMergedReport - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/redmar_van_den_berg/pacbio-merge:0.2" } @@ -62,7 +62,7 @@ task ccsChunks { input { Int chunkCount - String memory = "4G" + String memory = "4GiB" String dockerImage = "python:3.7-slim" } diff --git a/pbbam.wdl b/pbbam.wdl index ae64b87c..d5cafed6 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -26,7 +26,7 @@ task Index { String? outputBamPath - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" } diff --git a/pbmm2.wdl b/pbmm2.wdl index 5fda1c87..ea7c05df 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -29,7 +29,7 @@ task Mapping { File queryFile Int cores = 4 - String memory = "30G" + String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" } diff --git a/peach.wdl b/peach.wdl index d1bc17f8..7da029d0 100644 --- a/peach.wdl +++ b/peach.wdl @@ -29,7 +29,7 @@ task Peach { String outputDir = "./peach" File panelJson - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biowdl/peach:v1.5" Int timeMinutes = 5 } diff --git a/picard.wdl b/picard.wdl index 3d835829..f762ecdd 100644 --- a/picard.wdl +++ b/picard.wdl @@ -27,7 +27,7 @@ task BedToIntervalList { String outputPath = "regions.interval_list" String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -88,7 +88,7 @@ task CollectHsMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the # reference fasta twice. - Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -109,7 +109,7 @@ task CollectHsMetrics { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -157,7 +157,7 @@ task CollectMultipleMetrics { Int javaXmxMb = 3072 Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. - Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -223,7 +223,7 @@ task CollectMultipleMetrics { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -281,9 +281,9 @@ task CollectRnaSeqMetrics { String strandSpecificity = "NONE" String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" # With 6 minutes per G there were several timeouts. - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 12) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -340,8 +340,8 @@ task CollectTargetedPcrMetrics { String basename String javaXmx = "3G" - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -402,7 +402,7 @@ task CollectVariantCallingMetrics { String basename String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "5G" + String memory = "5GiB" String javaXmx = "4G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -516,7 +516,7 @@ task CreateSequenceDictionary { String outputDir String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -568,7 +568,7 @@ task GatherBamFiles { Int javaXmxMb = 1024 Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 1) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -593,7 +593,7 @@ task GatherBamFiles { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -630,8 +630,8 @@ task GatherVcfs { Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size(inputVcfs, "GiB") * 2) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -700,7 +700,7 @@ task MarkDuplicates { Int javaXmxMb = 6656 # 6.5G String memoryMb = javaXmxMb + 512 - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 8) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -736,7 +736,7 @@ task MarkDuplicates { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -782,8 +782,8 @@ task MergeVCFs { Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size(inputVCFs, "GiB")) * 2 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -838,7 +838,7 @@ task SamToFastq { Boolean paired = true String javaXmx = "16G" # High memory default to avoid crashes. - String memory = "17G" + String memory = "17GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" @@ -900,7 +900,7 @@ task ScatterIntervalList { Int scatter_count String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -943,7 +943,7 @@ task SortSam { # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -971,7 +971,7 @@ task SortSam { runtime { cpu: 1 - memory: "~{1 + XmxGb}G" + memory: "~{1 + XmxGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -1004,8 +1004,8 @@ task SortVcf { File? dict String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(vcfFiles, "GiB") * 5) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -1054,8 +1054,8 @@ task RenameSample { String newSampleName String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(inputVcf, "GiB") * 2) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -1109,7 +1109,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { Boolean useJdkInflater = false Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" Int timeMinutes = 360 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } diff --git a/prepareShiny.wdl b/prepareShiny.wdl index d669e2d1..28910743 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -25,7 +25,7 @@ task CreateDesignMatrix { File countTable String shinyDir = "." - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } @@ -67,7 +67,7 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } diff --git a/rtg.wdl b/rtg.wdl index 0e86ce3f..3e9dab9b 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -27,8 +27,8 @@ task Format { String outputPath = "seq_data.sdf" String rtgMem = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(inputFiles) * 2) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(inputFiles, "GiB") * 2) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } @@ -85,8 +85,8 @@ task VcfEval { String rtgMem = "8G" Int threads = 1 # Tool default is number of cores in the system 😱. - String memory = "9G" - Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size([baseline, calls], "GiB") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } diff --git a/sambamba.wdl b/sambamba.wdl index 6696668a..be347f94 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -27,7 +27,7 @@ task Flagstat { String outputPath = "./flagstat.txt" Int threads = 2 - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -84,7 +84,7 @@ task Markdup { # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -113,7 +113,7 @@ task Markdup { runtime { cpu: threads - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -149,7 +149,7 @@ task Sort { Int memoryPerThreadGb = 4 Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -177,7 +177,7 @@ task Sort { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } diff --git a/samtools.wdl b/samtools.wdl index 81b6c17d..e1b08173 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,8 +26,8 @@ task BgzipAndIndex { String outputDir String type = "vcf" - String memory = "2G" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(inputFile, "GiB")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -71,7 +71,7 @@ task Faidx { File inputFile String outputDir - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -119,7 +119,7 @@ task Fastq { Int? compressionLevel Int threads = 1 - String memory = "1G" + String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam) * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -183,8 +183,8 @@ task FilterShortReadsBam { File bamFile String outputPathBam - String memory = "1G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -229,7 +229,7 @@ task Flagstat { File inputBam String outputPath - String memory = "256M" # Only 40.5 MiB used for 150G bam file. + String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -269,8 +269,8 @@ task Index { String? outputBamPath - String memory = "2G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -321,7 +321,7 @@ task Markdup { File inputBam String outputBamPath - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -359,8 +359,8 @@ task Merge { Boolean force = true Int threads = 1 - String memory = "4G" - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -415,7 +415,7 @@ task Sort { Int memoryPerThreadGb = 4 Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -444,7 +444,7 @@ task Sort { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -473,7 +473,7 @@ task Tabix { String outputFilePath = "indexed.vcf.gz" String type = "vcf" - Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -526,8 +526,8 @@ task View { Int? MAPQthreshold Int threads = 1 - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } diff --git a/scripts b/scripts index 84690a30..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 diff --git a/smoove.wdl b/smoove.wdl index d1011f6c..7a1ac38b 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -29,7 +29,7 @@ task Call { String sample String outputDir = "./smoove" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" } diff --git a/snpeff.wdl b/snpeff.wdl index 4a3640c7..0f14e5b5 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -36,7 +36,7 @@ task SnpEff { Boolean noShiftHgvs = false Int? upDownStreamLen - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" diff --git a/somaticseq.wdl b/somaticseq.wdl index 63f8362e..7656d086 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -47,7 +47,7 @@ task ParallelPaired { File? strelkaSNV File? strelkaIndel - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -162,7 +162,7 @@ task ParallelPairedTrain { File? strelkaSNV File? strelkaIndel - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -270,7 +270,7 @@ task ParallelSingle { File? scalpelVCF File? strelkaVCF - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -362,7 +362,7 @@ task ParallelSingleTrain { File? scalpelVCF File? strelkaVCF - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -441,7 +441,7 @@ task ModifyStrelka { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 20 String dockerImage = "lethalfang/somaticseq:3.1.0" } diff --git a/spades.wdl b/spades.wdl index 3975dd32..d717ab28 100644 --- a/spades.wdl +++ b/spades.wdl @@ -100,6 +100,6 @@ task Spades { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" } } diff --git a/star.wdl b/star.wdl index 6a123c86..88d3c838 100644 --- a/star.wdl +++ b/star.wdl @@ -29,8 +29,8 @@ task GenomeGenerate { Int? sjdbOverhang Int threads = 4 - String memory = "32G" - Int timeMinutes = ceil(size(referenceFasta, "G") * 240 / threads) + String memory = "32GiB" + Int timeMinutes = ceil(size(referenceFasta, "GiB") * 240 / threads) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -130,12 +130,12 @@ task Star { Int runThreadN = 4 String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. - Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 300 / runThreadN) + Int timeMinutes = 1 + ceil(size(indexFiles, "GiB")) + ceil(size(flatten([inputR1, inputR2]), "GiB") * 300 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. - Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) + Int memoryGb = 1 + ceil(size(indexFiles, "GiB") * 1.3) # For some reason doing above calculation inside a string does not work. # So we solve it with an optional memory string and using select_first # in the runtime section. @@ -172,7 +172,7 @@ task Star { runtime { cpu: runThreadN - memory: select_first([memory, "~{memoryGb}G"]) + memory: select_first([memory, "~{memoryGb}GiB"]) time_minutes: timeMinutes docker: dockerImage } diff --git a/strelka.wdl b/strelka.wdl index be08e386..39afe172 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -63,7 +63,7 @@ task Germline { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -139,7 +139,7 @@ task Somatic { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/stringtie.wdl b/stringtie.wdl index 9c2f3cfc..fbe7e442 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -34,7 +34,7 @@ task Stringtie { Float? minimumCoverage Int threads = 1 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) String dockerImage = "quay.io/biocontainers/stringtie:1.3.6--h92e31bf_0" } @@ -102,7 +102,7 @@ task Merge { Float? minimumIsoformFraction String? label - String memory = "10G" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20) String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } diff --git a/survivor.wdl b/survivor.wdl index de232405..b233fb52 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -31,7 +31,7 @@ task Merge { Int minSize = 30 String outputPath = "./survivor/merged.vcf" - String memory = "24G" + String memory = "24GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } diff --git a/talon.wdl b/talon.wdl index 61f5eb4a..2f93e36b 100644 --- a/talon.wdl +++ b/talon.wdl @@ -30,7 +30,7 @@ task CreateAbundanceFileFromDatabase { File? whitelistFile File? datasetsFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -86,7 +86,7 @@ task CreateGtfFromDatabase { File? whitelistFile File? datasetFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -144,7 +144,7 @@ task FilterTalonTranscripts { File? datasetsFile Int? minDatasets - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -200,7 +200,7 @@ task GetReadAnnotations { File? datasetFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -248,7 +248,7 @@ task GetSpliceJunctions { String runMode = "intron" String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -302,7 +302,7 @@ task InitializeTalonDatabase { Int cutOff3p = 300 String outputPrefix - String memory = "10G" + String memory = "10GiB" Int timeMinutes = 60 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -360,7 +360,7 @@ task LabelReads { String outputPrefix Int threads = 4 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -413,7 +413,7 @@ task ReformatGtf { input { File gtfFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -454,7 +454,7 @@ task SummarizeDatasets { File? datasetGroupsCsv - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 50 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -506,7 +506,7 @@ task Talon { String outputPrefix Int threads = 4 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } diff --git a/transcriptclean.wdl b/transcriptclean.wdl index efdd95f4..8607a7a3 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -27,7 +27,7 @@ task GetSJsFromGtf { String outputPrefix Int minIntronSize = 21 - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -72,7 +72,7 @@ task GetTranscriptCleanStats { File inputSam String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -128,7 +128,7 @@ task TranscriptClean { File? variantFile Int cores = 1 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } diff --git a/umi-tools.wdl b/umi-tools.wdl index b79817c2..d8d17c48 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -30,7 +30,7 @@ task Extract { String? read2Output = "umi_extracted_R2.fastq.gz" Boolean threePrime = false - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } @@ -87,8 +87,8 @@ task Dedup { String? umiSeparator String? statsPrefix - String memory = "25G" - Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) + String memory = "25GiB" + Int timeMinutes = 30 + ceil(size(inputBam, "GiB") * 30) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } diff --git a/umi.wdl b/umi.wdl index 0dc5c55e..e7f01fc2 100644 --- a/umi.wdl +++ b/umi.wdl @@ -30,8 +30,8 @@ task BamReadNameToUmiTag { String outputPath = "output.bam" String umiTag = "RX" - String memory = "2G" - Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10) String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" } diff --git a/unicycler.wdl b/unicycler.wdl index 938d0c7e..d83db3ca 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -66,7 +66,7 @@ task Unicycler { String? lowScore Int threads = 1 - String memory = "4G" + String memory = "4GiB" } command { diff --git a/vardict.wdl b/vardict.wdl index 1c20e51c..187b4567 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -48,7 +48,7 @@ task VarDict { String javaXmx = "16G" Int threads = 1 - String memory = "18G" + String memory = "18GiB" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } diff --git a/vt.wdl b/vt.wdl index 85077dae..4da2d8cd 100644 --- a/vt.wdl +++ b/vt.wdl @@ -29,7 +29,7 @@ task Normalize { Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" } diff --git a/whatshap.wdl b/whatshap.wdl index 7307ce7c..da86ad82 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -38,7 +38,7 @@ task Phase { String? threshold String? ped - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -109,7 +109,7 @@ task Stats { String? blockList String? chromosome - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -169,7 +169,7 @@ task Haplotag { String? regions String? sample - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" diff --git a/wisestork.wdl b/wisestork.wdl index 8fb4b76b..bef54e27 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -31,7 +31,7 @@ task Count { Int? binSize File? binFile - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -69,7 +69,7 @@ task GcCorrect { Int? iter Float? fracLowess - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -129,7 +129,7 @@ task Newref { } runtime { - memory: "~{memory}G" + memory: "~{memory}GiB" docker: dockerImage } } @@ -147,7 +147,7 @@ task Zscore { Int? binSize File? binFile - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } From 5523913a18f121dcc524cac346dd82cf1162e804 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Aug 2022 14:37:42 +0200 Subject: [PATCH 440/668] Update changelog with memory change --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index be0e5a7c..5f4fed5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` + previously. The WDL spec clearly distuingishes between SI and binary + notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and + `GiB` this means java tasks such as GATK, FastQC and Picard will always + receive enough memory now. + Purple's `somaticRainfallPlot` output is now optional and included in the `plots` output as well. + Bedtools coverage's timeMinutes now defaults to `320`. From 75bb0cbcf2d2ccc57e8c5857f140cffe2a310c67 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Aug 2022 13:57:10 +0200 Subject: [PATCH 441/668] update survivor version --- CHANGELOG.md | 1 + survivor.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f4fed5b..b0b7c3e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Updated SURVIVOR version to 1.0.7 + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and diff --git a/survivor.wdl b/survivor.wdl index b233fb52..ae246f60 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -33,7 +33,7 @@ task Merge { String memory = "24GiB" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" + String dockerImage = "quay.io/biocontainers/survivor:1.0.7--hd03093a_2" } command { From bf7aba3c332a8dcabc87d22e1740049ed4bf7db4 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 7 Oct 2022 17:59:35 +0200 Subject: [PATCH 442/668] add fastp --- fastp.wdl | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 fastp.wdl diff --git a/fastp.wdl b/fastp.wdl new file mode 100644 index 00000000..8cf99d99 --- /dev/null +++ b/fastp.wdl @@ -0,0 +1,101 @@ +verison 1.0 + +# MIT License +# +# Copyright (c) 2022 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Fastp { + input { + File r1 + File r2 + String outputPathR1 + String outputPathR2 + String htmlPath + String jsonPath + + Int compressionLevel = 1 + Boolean correction = false + Int lengthRequired = 15 + Int? split + + Int threads = 4 + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / cores) + String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" + } + + String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") + + command { + set -e + mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) + # predict output paths + seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths + seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths + fastp \ + -i ~{r1} \ + ~{"-I " + r2} \ + -o ~{outputPathR1} \ + ~{"-O " + outputPathR2} \ + -h ~{htmlPath} \ + -j ~{jsonPath} \ + -z ~{compressionLevel} \ + ~{if correction then "--correction" else ""} \ + --length_required ~{lengthRequired} \ + --threads ~{threads} \ + ~{"--split " + split} \ + ~{if defined(split) then "-d 0" else ""} + } + + Array[String] r1Paths = read_lines("r1_paths") + Array[String] r2Paths = read_lines("r2_paths") + + output { + File htmlReport = htmlPath + File jsonReport = jsonPath + Array[File] clippedR1 = if defined(split) then r1Paths else [outputPathR1] + Array[File] clippedR2 = if defined(split) then r2Paths else [outputPathR2] + } + + runtime { + cpu: cores + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + r1: {description: "The R1 fastq file.", category: "required"} + r2: {description: "The R2 fastq file.", category: "required"} + outputPathR1: {description: "The output path for the R1 file.", category: "required"} + outputPathR2: {description: "The output path for the R2 file.", category: "required"} + htmlPath: {description: "The path to write the html report to.", category: "required"} + jsonPath: {description: "The path to write the json report to.", category: "required"} + compressionLevel: {description: "The compression level to use for the output.", category: "advanced"} + correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} + lengthRequired: {description: "The minimum read length.", category: "advanced"} + split: {description: "The number of chunks to split the files into.", category: "common"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 64427306fbbf58eb3ca9b3850a223d06894c9391 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 11 Oct 2022 12:13:08 +0200 Subject: [PATCH 443/668] fix some issues in fastp, add picard CollectInzertSizeMetrics --- fastp.wdl | 28 ++++++++++++++++------------ picard.wdl | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 8cf99d99..3063d012 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -1,4 +1,4 @@ -verison 1.0 +version 1.0 # MIT License # @@ -24,8 +24,8 @@ verison 1.0 task Fastp { input { - File r1 - File r2 + File read1 + File read2 String outputPathR1 String outputPathR2 String htmlPath @@ -35,24 +35,26 @@ task Fastp { Boolean correction = false Int lengthRequired = 15 Int? split + Boolean performAdapterTrimming = true Int threads = 4 String memory = "5GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / cores) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") + String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") - command { + command <<< set -e mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) # predict output paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths fastp \ - -i ~{r1} \ - ~{"-I " + r2} \ + -i ~{read1} \ + ~{"-I " + read2} \ -o ~{outputPathR1} \ ~{"-O " + outputPathR2} \ -h ~{htmlPath} \ @@ -62,8 +64,9 @@ task Fastp { --length_required ~{lengthRequired} \ --threads ~{threads} \ ~{"--split " + split} \ - ~{if defined(split) then "-d 0" else ""} - } + ~{if defined(split) then "-d 0" else ""} \ + ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} + >>> Array[String] r1Paths = read_lines("r1_paths") Array[String] r2Paths = read_lines("r2_paths") @@ -76,15 +79,15 @@ task Fastp { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage } parameter_meta { - r1: {description: "The R1 fastq file.", category: "required"} - r2: {description: "The R2 fastq file.", category: "required"} + read1: {description: "The R1 fastq file.", category: "required"} + read2: {description: "The R2 fastq file.", category: "required"} outputPathR1: {description: "The output path for the R1 file.", category: "required"} outputPathR2: {description: "The output path for the R2 file.", category: "required"} htmlPath: {description: "The path to write the html report to.", category: "required"} @@ -93,6 +96,7 @@ task Fastp { correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} lengthRequired: {description: "The minimum read length.", category: "advanced"} split: {description: "The number of chunks to split the files into.", category: "common"} + performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/picard.wdl b/picard.wdl index f762ecdd..6628cf0e 100644 --- a/picard.wdl +++ b/picard.wdl @@ -136,6 +136,58 @@ task CollectHsMetrics { } } +task CollectInsertSizeMetrics { + input { + File inputBam + File inputBamIndex + + Float? minimumPercentage + String basename = "./insertSize_metrics" + + String memory = "5GiB" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + CollectInsertSizeMetrics \ + I=~{inputBam} \ + O=~{basename}.txt \ + H=~{basename}.pdf \ + ~{"M=" + minimumPercentage} + } + + output { + File metricsTxt = "~{basename}.txt" + File metricsPdf = "~{basename}.pdf" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + minimumPercentage: {description: "Equivalent to picard CollectInsertSizeMetrics' `M` option.", category: "advanced"} + basename: {description: "The basename for the output files.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CollectMultipleMetrics { input { File inputBam From 346c0044a15279e1e3c5cd7140e24d9321255be8 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 15:21:07 +0200 Subject: [PATCH 444/668] fix fastp task --- fastp.wdl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 3063d012..c7a4d19f 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -48,7 +48,11 @@ task Fastp { command <<< set -e - mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) + mkdir -p $(dirname ~{outputPathR1}) + mkdir -p $(dirname ~{outputPathR2}) + mkdir -p $(dirname ~{htmlPath}) + mkdir -p $(dirname ~{jsonPath}) + # predict output paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths @@ -68,14 +72,11 @@ task Fastp { ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} >>> - Array[String] r1Paths = read_lines("r1_paths") - Array[String] r2Paths = read_lines("r2_paths") - output { File htmlReport = htmlPath File jsonReport = jsonPath - Array[File] clippedR1 = if defined(split) then r1Paths else [outputPathR1] - Array[File] clippedR2 = if defined(split) then r2Paths else [outputPathR2] + Array[File] clippedR1 = if defined(split) then read_lines("r1_paths") else [outputPathR1] + Array[File] clippedR2 = if defined(split) then read_lines("r2_paths") else [outputPathR2] } runtime { From 5b55e1b657b4d6d9ee189317d7cc5054493ef863 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 15:26:38 +0200 Subject: [PATCH 445/668] typo --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index c7a4d19f..572de7dc 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -66,7 +66,7 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --threads ~{threads} \ + --thread ~{threads} \ ~{"--split " + split} \ ~{if defined(split) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} From 9dda4c842ac98d083bd9c9fdeec1e97437040e65 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 16:02:40 +0200 Subject: [PATCH 446/668] increase memory for fastp --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index 572de7dc..becbaf4b 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,7 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "5GiB" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } From f8aa7e37593df2282161bc37c49a1d0b5039185b Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 18:06:18 +0200 Subject: [PATCH 447/668] increase memory for fastp --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index becbaf4b..25f09e39 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,7 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "10GiB" + String memory = "20GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } From e9215442ac12ff2f9ea4833b69daf809d8957cc6 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Fri, 21 Oct 2022 15:14:04 +0200 Subject: [PATCH 448/668] fastp: use number of splits as number of threads if set --- fastp.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 25f09e39..7f269d81 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,8 +38,8 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "20GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) + String memory = "50GiB" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } @@ -66,7 +66,7 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --thread ~{threads} \ + --thread ~{select_first([split, threads])} \ ~{"--split " + split} \ ~{if defined(split) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} @@ -80,7 +80,7 @@ task Fastp { } runtime { - cpu: threads + cpu: select_first([split, threads]) memory: memory time_minutes: timeMinutes docker: dockerImage @@ -96,9 +96,9 @@ task Fastp { compressionLevel: {description: "The compression level to use for the output.", category: "advanced"} correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} lengthRequired: {description: "The minimum read length.", category: "advanced"} - split: {description: "The number of chunks to split the files into.", category: "common"} + split: {description: "The number of chunks to split the files into. Number of threads will be set equal to the amount of splits.", category: "common"} performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} + threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From c7754754273f3ae4ce4bb34a9211cafec7880306 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 21 Oct 2022 16:48:34 +0200 Subject: [PATCH 449/668] Add a task to produce fasta indices --- biowdl.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/biowdl.wdl b/biowdl.wdl index f891618e..7392983a 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -73,3 +73,49 @@ task InputConverter { json: {description: "JSON file version of the input sample sheet."} } } + +task IndexFastaFile { + input { + File inputFile + String outputDir = "." + String javaXmx = "2G" + String memory = "3GiB" + } + String outputFile = outputDir + "/" + basename(inputFile) + # This executes both picard and samtools, so indexes are co-located in the same folder. + command <<< + set -e + mkdir -p ~{outputDir} + ln -s ~{inputFile} ~{outputFile} + picard -Xmx~{javaXmx} \ + -XX:ParallelGCThreads=1 \ + CreateSequenceDictionary \ + REFERENCE=~{inputFile} \ + OUTPUT="~{outputFile}.dict" + samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai + >>> + + output { + File outputFasta = outputFile + File outputFastaDict = outputFile + ".dict" + File outputFastaFai = outputFile + ".fai" + } + + runtime { + memory: memory + # Contains picard 2.27.4, samtools 1.15.1 + docker: "quay.io/biocontainers/mulled-v2-b0664646864bfdb46c5343b1b2b93fc05adb4b77:39a005770a3e30fb6aa3bf424b57ddf52bae7ece-0" + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + # outputs + outputFasta: {description: "Fasta file that is co-located with the indexes"} + outputFastaFai: {description: "Fasta index file for the outputFasta file."} + outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} + } +} \ No newline at end of file From 8d5a451e1d3938f62d14add4167fcf83dd9a0e70 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 24 Oct 2022 09:45:54 +0200 Subject: [PATCH 450/668] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 26ab4e4a..5776dfed 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -604,7 +604,7 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9iB" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" From f05d968d69d6c3a41b03a761a4a4838e5889df6c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 14:45:15 +0200 Subject: [PATCH 451/668] Add a Bwa index task --- bwa.wdl | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/bwa.wdl b/bwa.wdl index d4f4495a..f79a219a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -114,3 +114,29 @@ struct BwaIndex { File fastaFile Array[File] indexFiles } + +task Index { + input { + File fasta + } + File indexedFile = "reference.fasta" + + command { + set -e + cp ~{fasta} ~{indexedFile} + bwa index ~{indexedFile} + } + + output { + BwaIndex index = { + "fastaFile": indexedFile, + "indexFiles": [ + indexedFile + ".amb", + indexedFile + ".ann", + indexedFile + ".bwt", + indexedFile + ".pac", + indexedFile + ".sa" + ] + } + } +} \ No newline at end of file From 23b324ea33f63cb4901fd66528f4ecead4cab0d5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 15:02:55 +0200 Subject: [PATCH 452/668] Copy reference to prevent problems --- biowdl.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biowdl.wdl b/biowdl.wdl index 7392983a..fe49a6cf 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -86,7 +86,7 @@ task IndexFastaFile { command <<< set -e mkdir -p ~{outputDir} - ln -s ~{inputFile} ~{outputFile} + cp ~{inputFile} ~{outputFile} picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ From 4431b259d68024b057fe5cfd5dc4de2424450d4b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 15:46:09 +0200 Subject: [PATCH 453/668] Make sure index task works --- bwa.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index f79a219a..a129ebb4 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -119,7 +119,7 @@ task Index { input { File fasta } - File indexedFile = "reference.fasta" + String indexedFile = "reference.fasta" command { set -e @@ -128,9 +128,9 @@ task Index { } output { - BwaIndex index = { - "fastaFile": indexedFile, - "indexFiles": [ + BwaIndex index = object { + fastaFile: indexedFile, + indexFiles: [ indexedFile + ".amb", indexedFile + ".ann", indexedFile + ".bwt", @@ -139,4 +139,10 @@ task Index { ] } } + + runtime { + docker: "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + cpu: 1 + memory: "~{size(fasta, 'G') + 1}GiB" + } } \ No newline at end of file From af929db9c2392cdc24a3ef2e7c644ca4d055cc3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Oct 2022 14:17:40 +0200 Subject: [PATCH 454/668] Use the basename of the input file for index names --- biowdl.wdl | 11 +++++------ bwa.wdl | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index fe49a6cf..58e94df8 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -77,27 +77,27 @@ task InputConverter { task IndexFastaFile { input { File inputFile - String outputDir = "." String javaXmx = "2G" String memory = "3GiB" } - String outputFile = outputDir + "/" + basename(inputFile) + String outputFile = basename(inputFile) + # Capture .fa¸ .fna and .fasta + String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" # This executes both picard and samtools, so indexes are co-located in the same folder. command <<< set -e - mkdir -p ~{outputDir} cp ~{inputFile} ~{outputFile} picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ REFERENCE=~{inputFile} \ - OUTPUT="~{outputFile}.dict" + OUTPUT="~{outputDict}" samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai >>> output { File outputFasta = outputFile - File outputFastaDict = outputFile + ".dict" + File outputFastaDict = outputDict File outputFastaFai = outputFile + ".fai" } @@ -110,7 +110,6 @@ task IndexFastaFile { parameter_meta { # inputs inputFile: {description: "The input fasta file.", category: "required"} - outputDir: {description: "Output directory path.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} # outputs diff --git a/bwa.wdl b/bwa.wdl index a129ebb4..8f694b45 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -119,7 +119,7 @@ task Index { input { File fasta } - String indexedFile = "reference.fasta" + String indexedFile = basename(fasta) command { set -e From 2dc14b39d06dcc1c8161a9bf5840ebe5d88ccb25 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Oct 2022 14:33:20 +0200 Subject: [PATCH 455/668] Make index use the basename of the file --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index e1b08173..bee38d11 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -470,7 +470,7 @@ task Sort { task Tabix { input { File inputFile - String outputFilePath = "indexed.vcf.gz" + String outputFilePath = basename(inputFile) String type = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) From 46bf6537c1787f47b7758d350b6605dae6da00cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 26 Oct 2022 14:38:17 +0200 Subject: [PATCH 456/668] Add indexing tasks to the changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f4fed5b..d94c2b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a combined Picard CreateSequenceDictionary and samtools faidx task. ++ Add a BWA index task. + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and From c6fe0300c5d2e5275739148c051f931e717cd6f1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 11:38:30 +0200 Subject: [PATCH 457/668] Use samtools dict instead of Picard CreateSequenceDictionary --- CHANGELOG.md | 2 +- biowdl.wdl | 45 --------------------------------------------- samtools.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d94c2b56..b9df32a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- -+ Add a combined Picard CreateSequenceDictionary and samtools faidx task. ++ Add a combined samtools dict and samtools faidx task. + Add a BWA index task. + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary diff --git a/biowdl.wdl b/biowdl.wdl index 58e94df8..463dab75 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -72,49 +72,4 @@ task InputConverter { # outputs json: {description: "JSON file version of the input sample sheet."} } -} - -task IndexFastaFile { - input { - File inputFile - String javaXmx = "2G" - String memory = "3GiB" - } - String outputFile = basename(inputFile) - # Capture .fa¸ .fna and .fasta - String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" - # This executes both picard and samtools, so indexes are co-located in the same folder. - command <<< - set -e - cp ~{inputFile} ~{outputFile} - picard -Xmx~{javaXmx} \ - -XX:ParallelGCThreads=1 \ - CreateSequenceDictionary \ - REFERENCE=~{inputFile} \ - OUTPUT="~{outputDict}" - samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai - >>> - - output { - File outputFasta = outputFile - File outputFastaDict = outputDict - File outputFastaFai = outputFile + ".fai" - } - - runtime { - memory: memory - # Contains picard 2.27.4, samtools 1.15.1 - docker: "quay.io/biocontainers/mulled-v2-b0664646864bfdb46c5343b1b2b93fc05adb4b77:39a005770a3e30fb6aa3bf424b57ddf52bae7ece-0" - } - - parameter_meta { - # inputs - inputFile: {description: "The input fasta file.", category: "required"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} - # outputs - outputFasta: {description: "Fasta file that is co-located with the indexes"} - outputFastaFai: {description: "Fasta index file for the outputFasta file."} - outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} - } } \ No newline at end of file diff --git a/samtools.wdl b/samtools.wdl index bee38d11..d5e3ce0e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -66,6 +66,49 @@ task BgzipAndIndex { } } +task DictAndFaidx { + input { + File inputFile + String javaXmx = "2G" + String memory = "3GiB" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + } + + String outputFile = basename(inputFile) + # Capture .fa¸ .fna and .fasta + String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" + # This executes both dict and faidx, so indexes are co-located in the same folder. + command <<< + set -e + cp ~{inputFile} ~{outputFile} + samtools dict -o ~{outputDict} ~{outputFile} + samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai + >>> + + output { + File outputFasta = outputFile + File outputFastaDict = outputDict + File outputFastaFai = outputFile + ".fai" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + # outputs + outputFasta: {description: "Fasta file that is co-located with the indexes"} + outputFastaFai: {description: "Fasta index file for the outputFasta file."} + outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + task Faidx { input { File inputFile From 61161df05a65d5a3f3427d381254988208266c98 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 11:45:26 +0200 Subject: [PATCH 458/668] Add time_minutes dockerimage and update parameter_meta --- biowdl.wdl | 2 +- bwa.wdl | 16 ++++++++++++++-- samtools.wdl | 4 +++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index 463dab75..f891618e 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -72,4 +72,4 @@ task InputConverter { # outputs json: {description: "JSON file version of the input sample sheet."} } -} \ No newline at end of file +} diff --git a/bwa.wdl b/bwa.wdl index 8f694b45..e1e61bbe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -118,6 +118,8 @@ struct BwaIndex { task Index { input { File fasta + String dockerImage = "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + Int? timeMinutes = 5 + ceil(size(fasta, "G") * 5) } String indexedFile = basename(fasta) @@ -141,8 +143,18 @@ task Index { } runtime { - docker: "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + docker: dockerImage cpu: 1 memory: "~{size(fasta, 'G') + 1}GiB" + time_minutes: timeMinutes + } + parameter_meta { + # inputs + fasta: {description: "Reference fasta file.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + index: {description: "The produced BWA index."} } -} \ No newline at end of file +} diff --git a/samtools.wdl b/samtools.wdl index d5e3ce0e..76a07ef5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -71,6 +71,7 @@ task DictAndFaidx { File inputFile String javaXmx = "2G" String memory = "3GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -101,11 +102,12 @@ task DictAndFaidx { inputFile: {description: "The input fasta file.", category: "required"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputFasta: {description: "Fasta file that is co-located with the indexes"} outputFastaFai: {description: "Fasta index file for the outputFasta file."} outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 3c53b47f4ba4e2c75fc104dabe972a50332552e6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 12:25:53 +0200 Subject: [PATCH 459/668] Add @DavyCats' suggestions --- bwa.wdl | 1 + samtools.wdl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/bwa.wdl b/bwa.wdl index e1e61bbe..66b8e8cc 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -148,6 +148,7 @@ task Index { memory: "~{size(fasta, 'G') + 1}GiB" time_minutes: timeMinutes } + parameter_meta { # inputs fasta: {description: "Reference fasta file.", category: "required"} diff --git a/samtools.wdl b/samtools.wdl index 76a07ef5..df712e51 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -95,6 +95,8 @@ task DictAndFaidx { runtime { memory: memory docker: dockerImage + time_minutes: timeMinutes + cpu: 1 } parameter_meta { From 0632414b9ae0663431e8a25b35463c9aa83badbe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 4 Nov 2022 11:03:34 +0100 Subject: [PATCH 460/668] typo --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b522c02c..daf79c8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,6 @@ version 5.1.0-dev + Updated SURVIVOR version to 1.0.7 + Add a combined samtools dict and samtools faidx task. + Add a BWA index task. - + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and From 4a42403fb4bf27ba21f63b99c7cb75f9d13adfeb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 8 Nov 2022 16:33:30 +0100 Subject: [PATCH 461/668] Fallback to copying when hardlinking does not work --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index df712e51..587a53fb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -332,7 +332,7 @@ task Index { if [ ! -f ~{outputPath} ] then mkdir -p "$(dirname ~{outputPath})" - ln ~{bamFile} ~{outputPath} + ln ~{bamFile} ~{outputPath} || cp ~{bamFile} ~{outputPath} fi samtools index ~{outputPath} ~{bamIndexPath} ' @@ -531,7 +531,7 @@ task Tabix { mkdir -p "$(dirname ~{outputFilePath})" if [ ! -f ~{outputFilePath} ] then - ln ~{inputFile} ~{outputFilePath} + ln ~{inputFile} ~{outputFilePath} || cp ~{inputFile} ~{outputFilePath} fi tabix ~{outputFilePath} -p ~{type} } From daf19317d6f5aafc4e156910393f8bf02c012199 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Nov 2022 15:27:37 +0100 Subject: [PATCH 462/668] remove second breakends in gridss AnnotateSvTypes script --- gridss.wdl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index cfe53751..9a09bdde 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -119,9 +119,14 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - # GRIDSS doesn't supply a GT, so we estimate GT based on AF (assuming CN of 2, might be inaccurate) - geno(vcf)$GT <- ifelse(geno(vcf)$AF > 0.75, "1/1", ifelse(geno(vcf)$AF < 0.25, "0/0", "0/1")) - writeVcf(vcf, out_path, index=~{index}) + # GRIDSS doesn't supply a GT, simply set it to 0/1 + geno(vcf)$GT <- "0/1" + # Select only one breakend per event (also removes single breakends): + # sourceId ends with o or h for paired breakends, the first in the pair + # end with o the second with h. Single breakend end with b, these will + # also be removed since we can't determine the SVTYPE. + gr2 <- gr[grepl(".*o$", gr$sourceId)] + writeVcf(vcf[gr2$sourceId], out_path, index=~{index}) EOF >>> From 9cf522d5cf766ef7943226e8d4807643ee93721d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Nov 2022 16:00:25 +0100 Subject: [PATCH 463/668] fix typing issue in AnnotateSvTypes R code --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 9a09bdde..8e1474c1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -120,7 +120,7 @@ task AnnotateSvTypes { svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype # GRIDSS doesn't supply a GT, simply set it to 0/1 - geno(vcf)$GT <- "0/1" + geno(vcf)$GT <- as.matrix(sapply(row.names(vcf), function(x) {"0/1"})) # Select only one breakend per event (also removes single breakends): # sourceId ends with o or h for paired breakends, the first in the pair # end with o the second with h. Single breakend end with b, these will From 2e1c9972b01922cd915b7041b230e6287dda778b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 14 Nov 2022 13:40:06 +0100 Subject: [PATCH 464/668] fix issue where fastp errors if split is set to 1 --- fastp.wdl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 7f269d81..db4a2d40 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -41,11 +41,15 @@ task Fastp { String memory = "50GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" + + Int? noneInt } String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") + Int? effectiveSplit = if select_first([split, 1]) > 1 then split else noneInt + command <<< set -e mkdir -p $(dirname ~{outputPathR1}) @@ -54,8 +58,8 @@ task Fastp { mkdir -p $(dirname ~{jsonPath}) # predict output paths - seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths - seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths + seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths + seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths fastp \ -i ~{read1} \ ~{"-I " + read2} \ @@ -66,21 +70,21 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --thread ~{select_first([split, threads])} \ - ~{"--split " + split} \ - ~{if defined(split) then "-d 0" else ""} \ + --thread ~{select_first([effectiveSplit, threads])} \ + ~{"--split " + effectiveSplit} \ + ~{if defined(effectiveSplit) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} >>> output { File htmlReport = htmlPath File jsonReport = jsonPath - Array[File] clippedR1 = if defined(split) then read_lines("r1_paths") else [outputPathR1] - Array[File] clippedR2 = if defined(split) then read_lines("r2_paths") else [outputPathR2] + Array[File] clippedR1 = if defined(effectiveSplit) then read_lines("r1_paths") else [outputPathR1] + Array[File] clippedR2 = if defined(effectiveSplit) then read_lines("r2_paths") else [outputPathR2] } runtime { - cpu: select_first([split, threads]) + cpu: select_first([effectiveSplit, threads]) memory: memory time_minutes: timeMinutes docker: dockerImage From 636b1f0ea31168d9001ea7b45efe6d3333d944a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Nov 2022 14:12:45 +0100 Subject: [PATCH 465/668] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index daf79c8f..2c4cff52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ The GRIDSS AnnotateSvTypes task now also removes the second breakend of + the breakpoints and single breakends. This will prepare the output better + to be passed into survivor. + Updated SURVIVOR version to 1.0.7 + Add a combined samtools dict and samtools faidx task. + Add a BWA index task. From b382cf745b6d7ed389bbca4efdfa70e37070d835 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Nov 2022 14:23:00 +0100 Subject: [PATCH 466/668] adjusted runtime attributes for clever tasks --- CHANGELOG.md | 6 ++++++ clever.wdl | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c4cff52..d2e95f60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Mateclever's runtime attribute defaults were changed to: + + memory: `"250GiB"` + + timeMinutes: `2880` ++ Clever's Prediction task's runtime attribute defaults were changed to: + + memory: `"80GiB"` + + timeMinutes: `2200` + The GRIDSS AnnotateSvTypes task now also removes the second breakend of the breakpoints and single breakends. This will prepare the output better to be passed into survivor. diff --git a/clever.wdl b/clever.wdl index 791a0ba1..3b819ed2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -34,8 +34,8 @@ task Mateclever { Int maxOffset = 150 Int threads = 10 - String memory = "15GiB" - Int timeMinutes = 600 + String memory = "250GiB" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -94,8 +94,8 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "55GiB" - Int timeMinutes = 480 + String memory = "80GiB" + Int timeMinutes = 2200 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } From 522f2046d07479d1964de103f8d75a190a4a5292 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Nov 2022 11:48:50 +0100 Subject: [PATCH 467/668] increase time for Amber --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 5776dfed..3b09beb9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "70GiB" String javaXmx = "64G" - Int timeMinutes = 240 + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 78e02137e639dc35e24c6c9ac08a1efedfda7ebd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Nov 2022 17:12:07 +0100 Subject: [PATCH 468/668] increase memory for amber --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3b09beb9..e051dc99 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "70GiB" - String javaXmx = "64G" + String memory = "85GiB" + String javaXmx = "80G" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 1a80829e5bc6b9f607d3cb748f7af6c47e90f8bf Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Jan 2023 16:37:32 +0100 Subject: [PATCH 469/668] Add targets file input to samtools view --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 587a53fb..8503777c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,6 +571,7 @@ task View { Int? excludeFilter Int? excludeSpecificFilter Int? MAPQthreshold + File? targetFile Int threads = 1 String memory = "1GiB" @@ -593,6 +594,7 @@ task View { ~{"-G " + excludeSpecificFilter} \ ~{"-q " + MAPQthreshold} \ ~{"--threads " + (threads - 1)} \ + ~{"--target-file " + targetFile} \ ~{inFile} samtools index ~{outputFileName} ~{outputIndexPath} } From 1ad000b1370898459d2ef3d6e2b3939699874c4f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Jan 2023 17:07:32 +0100 Subject: [PATCH 470/668] update samtools containers --- samtools.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 8503777c..303f9821 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -72,7 +72,7 @@ task DictAndFaidx { String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputFile = basename(inputFile) @@ -119,7 +119,7 @@ task Faidx { String outputDir String memory = "2GiB" - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -168,7 +168,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -232,7 +232,7 @@ task FilterShortReadsBam { String memory = "1GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -278,7 +278,7 @@ task Flagstat { String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -318,7 +318,7 @@ task Index { String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } # Select_first is needed, otherwise womtool validate fails. @@ -369,7 +369,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -408,7 +408,7 @@ task Merge { Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -463,7 +463,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -576,7 +576,7 @@ task View { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputIndexPath = basename(outputFileName) + ".bai" From d686e0870442c002b7902e9a8f33467dc404fa14 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:15:45 +0100 Subject: [PATCH 471/668] Add parameter_meta for targetFile --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 303f9821..771a9969 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -621,6 +621,7 @@ task View { excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"} + targetFile: {description: "A BED file with regions to include", caegory: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From b52e3250eb5823b0ddbe4363eb3a77ab798d6fd0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:17:38 +0100 Subject: [PATCH 472/668] Update changelog with samtools change --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2e95f60..c6b5e609 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Update samtools image to version 1.16. ++ Add targetsFile input for samtools View. + Mateclever's runtime attribute defaults were changed to: + memory: `"250GiB"` + timeMinutes: `2880` From e1abb7dc92090bb836b6468be9ae33dc1696a44d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:21:22 +0100 Subject: [PATCH 473/668] Use latest version of scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 98cc3e10..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From 9fce64caa41bf1cd0ec5e43337a31f3c8a8466cf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 23 Jan 2023 12:07:38 +0100 Subject: [PATCH 474/668] add memory runtime attribute to tabix task --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 771a9969..fbb445e7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -542,6 +542,7 @@ task Tabix { } runtime { + memory: "2GiB" time_minutes: timeMinutes docker: dockerImage } From 5f5d51a3515b78c0d290e23a022255207c95bb7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 24 Jan 2023 16:37:48 +0100 Subject: [PATCH 475/668] add various tasks for somatic SV calling --- delly.wdl | 65 ++++++++++++++++++--- gridss.wdl | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 214 insertions(+), 12 deletions(-) diff --git a/delly.wdl b/delly.wdl index 7333c5ff..fab32784 100644 --- a/delly.wdl +++ b/delly.wdl @@ -22,15 +22,17 @@ version 1.0 task CallSV { input { - File bamFile - File bamIndex + Array[File]+ bamFile + Array[File]+ bamIndex File referenceFasta File referenceFastaFai String outputPath = "./delly/delly.bcf" + File? genotypeBcf + String memory = "15GiB" Int timeMinutes = 300 - String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" + String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" } command { @@ -39,7 +41,8 @@ task CallSV { delly call \ -o ~{outputPath} \ -g ~{referenceFasta} \ - ~{bamFile} + ~{"-v " + genotypeBcf} \ + ~{sep=" " bamFile} } output { @@ -54,11 +57,12 @@ task CallSV { parameter_meta { # inputs - bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} + bamFile: {description: "The bam files to process.", category: "required"} + bamIndex: {description: "The indexes for the bam files.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output BCF file should be written.", category: "common"} + genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples."} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -67,3 +71,50 @@ task CallSV { dellyBcf: {description: "File containing structural variants."} } } + + +task SomaticFilter { + input { + File dellyBcf + Array[String]+ normalSamples + Array[String]+ tumorSamples + String outputPath = "./delly/delly_filter.bcf" + + String memory = "15GiB" + Int timeMinutes = 300 + String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" + } + + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + for SAMPLE in ~{sep=" " normalSamples}; do echo -e "${SAMPLE}\tcontrol" >> samples.tsv; done + for SAMPLE in ~{sep=" " tumorSamples}; do echo -e "${SAMPLE}\ttumor" >> samples.tsv; done + + delly filter \ + -f somatic \ + -o ~{outputPath} \ + -s samples.tsv \ + ~{dellyBcf} + >>> + + output { + File filterBcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + dellyBcf: {description: "The BCF file produced by delly.", category: "required"} + normalSamples: {description: "The names for the normal samples as used in the delly BCF file.", category: "required"} + tumorSamples: {description: "The names for the tumor samples as used in the delly BCF file.", category: "required"} + outputPath: {description: "The location the output BCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file diff --git a/gridss.wdl b/gridss.wdl index 8e1474c1..647f2d67 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -152,11 +152,108 @@ task AnnotateSvTypes { } } +task FilterPon { + input { + File ponBed + File ponBedpe + Int minimumScore = 3 + String outputDir = "." + + String memory = "1GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 20 + } + + command { + set -e + mkdir -p ~{outputDir} + + cat ~{ponBed} | awk '{if ($5 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_single_breakend.bed + cat ~{ponBedpe} | awk '{if ($8 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_breakpoint.bedpe + } + + output { + File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" + File bed = "~{outputDir}/gridss_pon_single_breakend.bed" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + ponBed: {description: "The PON BED file.", category: "required"} + ponBedpe: {description: "The PON BEDPE file.", category: "required"} + minimumScore: {description: "The minimum number normal samples an SV must have been found in to be kept.", category: "advanced"} + outputDir: {description: "The directory the output will be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GeneratePonBedpe { + input { + Array[File]+ vcfFiles + Array[File]+ vcfIndexes + File referenceFasta + String outputDir = "." + + Int threads = 8 + String javaXmx = "8G" + String memory = "9GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 120 + } + + command { + set -e + mkdir -p ~{outputDir} + java -Xmx~{javaXmx} \ + -cp /usr/local/share/gridss-2.12.2-0/gridss.jar \ + gridss.GeneratePonBedpe \ + INPUT=~{sep=" INPUT=" vcfFiles} \ + O=~{outputDir}/gridss_pon_breakpoint.bedpe \ + SBO=~{outputDir}/gridss_pon_single_breakend.bed \ + REFERENCE_SEQUENCE=~{referenceFasta} \ + THREADS=~{threads} + } + + output { + File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" + File bed = "~{outputDir}/gridss_pon_single_breakend.bed" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} + referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { - File tumorBam - File tumorBai - String tumorLabel + Array[File]+ tumorBam + Array[File]+ tumorBai + Array[String]+ tumorLabel BwaIndex reference String outputPrefix = "gridss" @@ -184,10 +281,10 @@ task GRIDSS { ~{"-c " + gridssProperties} \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ - --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{sep="," tumorLabel} \ ~{"--blacklist " + blacklistBed} \ ~{normalBam} \ - ~{tumorBam} + ~{sep=" " tumorBam} samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai # For some reason the VCF index is sometimes missing @@ -283,6 +380,60 @@ task GridssAnnotateVcfRepeatmasker { } } +task SomaticFilter { + input { + File vcfFile + File vcfIndex + File ponBed + File ponBedpe + String outputPath = "./high_confidence_somatic.vcf.gz" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf.gz" + + String memory = "16GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 60 + } + + command { + set -e + mkdir -p $(dirname ~{outputPath}) + mkdir -p $(dirname ~{fullOutputPath}) + + gridss_somatic_filter \ + --pondir ~{dirname(ponBed)} \ + --input ~{vcfFile} \ + --output ~{outputPath} \ + --fulloutput ~{fullOutputPath} + } + + output { + File fullVcf = fullOutputPath + File fullVcfIndex = "~{fullOutputPath}.tbi" + File highConfidenceVcf = outputPath + File highConfidenceVcfIndex = "~{outputPath}.tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + vcfFile: {description: "The GRIDSS VCF file.", category: "required"} + vcfIndex: {description: "The index for the GRIDSS VCF file.", category: "required"} + ponBed: {description: "The PON BED file.", category: "required"} + ponBedpe: {description: "The PON BEDPE file.", category: "required"} + outputPath: {description: "The path the high confidence output should be written to.", category: "common"} + fullOutputPath: {description: "The path the full output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Virusbreakend { input { File bam From 90bcc945807e9ef2c13fbd542d69f3b912995a0c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 25 Jan 2023 14:06:10 +0100 Subject: [PATCH 476/668] fix lint issues --- gridss.wdl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 647f2d67..82ac7fbd 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -164,13 +164,13 @@ task FilterPon { Int timeMinutes = 20 } - command { + command <<< set -e mkdir -p ~{outputDir} cat ~{ponBed} | awk '{if ($5 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_single_breakend.bed cat ~{ponBedpe} | awk '{if ($8 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_breakpoint.bedpe - } + >>> output { File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" @@ -189,8 +189,6 @@ task FilterPon { minimumScore: {description: "The minimum number normal samples an SV must have been found in to be kept.", category: "advanced"} outputDir: {description: "The directory the output will be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -394,13 +392,15 @@ task SomaticFilter { Int timeMinutes = 60 } + String ponDir = sub(ponBed, basename(ponBed), "") + command { set -e mkdir -p $(dirname ~{outputPath}) mkdir -p $(dirname ~{fullOutputPath}) gridss_somatic_filter \ - --pondir ~{dirname(ponBed)} \ + --pondir ~{ponDir} \ --input ~{vcfFile} \ --output ~{outputPath} \ --fulloutput ~{fullOutputPath} @@ -414,7 +414,6 @@ task SomaticFilter { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage From 34b3732319f7d74c72f93ff1bcb05ccc675585f8 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 16:47:10 +0100 Subject: [PATCH 477/668] Add a number of macs2 flags so we can adhere to Encode --- macs2.wdl | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index e6a011ad..53be0abd 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -30,8 +30,15 @@ task PeakCalling { String sampleName String format = "AUTO" Boolean nomodel = false + String gensz = "hs" + Int extsize + Int shiftsize = -1*round(extsize/2) + Float pval_thres = 0.01 + Boolean bdg = true + String keepdup = "auto" + String callsummits = true Int timeMinutes = 600 # Default to 10 hours - String memory = "8GiB" + String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -43,7 +50,14 @@ task PeakCalling { --outdir ~{outDir} \ --name ~{sampleName} \ -f ~{format} \ - ~{true='--nomodel' false='' nomodel} + -g ~{gensz} \ + -p ~{pval_thres} \ + --shift ~{shiftsize} \ + --extsize ~{extsize} \ + ~{true='--nomodel' false='' nomodel} \ + ~{true='-B' false='' bdg} \ + --keep-dup ~{keepdup} \ + ~{true='--call-summits' false='' callsummits} } output { @@ -64,6 +78,13 @@ task PeakCalling { sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} + gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced."} + pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value."} + shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction"} + extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments."} + bdg: {description: "macs2 argument that ebanbles the storage of the fragment pileup, control lambda in bedGraph files."} + keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location."} + callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure."} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ae937f28ab0147b572916c97448f6c788fa58e19 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 16:55:23 +0100 Subject: [PATCH 478/668] Fix data type error --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 53be0abd..854db814 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -36,7 +36,7 @@ task PeakCalling { Float pval_thres = 0.01 Boolean bdg = true String keepdup = "auto" - String callsummits = true + Boolean callsummits = true Int timeMinutes = 600 # Default to 10 hours String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" From 2dca5f3611fd3aef0ee501cbe05467b590c93280 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 17:29:04 +0100 Subject: [PATCH 479/668] Address comments from Ruben --- macs2.wdl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 854db814..7b11c99f 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,16 +29,16 @@ task PeakCalling { String outDir = "macs2" String sampleName String format = "AUTO" - Boolean nomodel = false - String gensz = "hs" - Int extsize - Int shiftsize = -1*round(extsize/2) - Float pval_thres = 0.01 - Boolean bdg = true - String keepdup = "auto" - Boolean callsummits = true + Boolean? nomodel + String? gensz + Int? extsize + Int? shiftsize = -1*round(extsize/2) + Float? pval_thres + Boolean? bdg + String? keepdup + Boolean? callsummits Int timeMinutes = 600 # Default to 10 hours - String memory = "8G" + String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -49,14 +49,14 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ - -f ~{format} \ - -g ~{gensz} \ - -p ~{pval_thres} \ - --shift ~{shiftsize} \ - --extsize ~{extsize} \ + ~{"-f" + format} \ + ~{"-g" + gensz} \ + ~{"-p" + pval_thres} \ + ~{"--shift" + shiftsize} \ + ~{"--extsize" + extsize} \ ~{true='--nomodel' false='' nomodel} \ ~{true='-B' false='' bdg} \ - --keep-dup ~{keepdup} \ + ~{"--keep-dup" + keepdup} \ ~{true='--call-summits' false='' callsummits} } From e89b1d7d13fef289ba17ee0f6acc8e8b5415a217 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Thu, 26 Jan 2023 10:31:15 +0100 Subject: [PATCH 480/668] Delete calculation for shiftsize --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 7b11c99f..8d89f3af 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -32,7 +32,7 @@ task PeakCalling { Boolean? nomodel String? gensz Int? extsize - Int? shiftsize = -1*round(extsize/2) + Int? shiftsize Float? pval_thres Boolean? bdg String? keepdup From e996878ae65113bc66add0caaf7b5d9efc75ad73 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:13 +0100 Subject: [PATCH 481/668] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 8d89f3af..70fea707 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,7 +29,7 @@ task PeakCalling { String outDir = "macs2" String sampleName String format = "AUTO" - Boolean? nomodel + Boolean nomodel = false String? gensz Int? extsize Int? shiftsize From 055246a9082ec004ab335c7525685c888fd6e27f Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:24 +0100 Subject: [PATCH 482/668] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 70fea707..2c3bf57c 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -34,7 +34,7 @@ task PeakCalling { Int? extsize Int? shiftsize Float? pval_thres - Boolean? bdg + Boolean bdg = false String? keepdup Boolean? callsummits Int timeMinutes = 600 # Default to 10 hours From 72bbcce9084408ee7ba68a04dd8f121a8a793390 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:35 +0100 Subject: [PATCH 483/668] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 2c3bf57c..c4c08ed5 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -36,7 +36,7 @@ task PeakCalling { Float? pval_thres Boolean bdg = false String? keepdup - Boolean? callsummits + Boolean callsummits = false Int timeMinutes = 600 # Default to 10 hours String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" From 4b9754f548b8558e7de2652e257edd807d0d4ffa Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:43 +0100 Subject: [PATCH 484/668] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index c4c08ed5..9d5344ae 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -78,13 +78,13 @@ task PeakCalling { sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} - gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced."} - pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value."} - shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction"} - extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments."} - bdg: {description: "macs2 argument that ebanbles the storage of the fragment pileup, control lambda in bedGraph files."} - keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location."} - callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure."} + gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced.", category: "advanced"} + pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value.", category: "advanced"} + shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction.", category: "advanced"} + extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments.", category: "advanced"} + bdg: {description: "macs2 argument that enables the storage of the fragment pileup, control lambda in bedGraph files.", category: "advanced"} + keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location.", category: "advanced"} + callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ece0782a37451b82677eedd1ed771d823b56e891 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 11:26:19 +0100 Subject: [PATCH 485/668] Update CHANGELOG.md --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6b5e609..4962c687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + + nomodel + + gensz + + extsize + + shiftsize + + pval_thres + + bdg + + keepdup + + callsummits + Update samtools image to version 1.16. + Add targetsFile input for samtools View. + Mateclever's runtime attribute defaults were changed to: From 2b4fb7ea3fc9270af1caaea897f35d2b319c35fd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 26 Jan 2023 14:32:37 +0100 Subject: [PATCH 486/668] add missing paramter_meta --- gridss.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/gridss.wdl b/gridss.wdl index 82ac7fbd..8b27df77 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -238,6 +238,7 @@ task GeneratePonBedpe { vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "common"} + threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From 6d0329539033821b68ef31234ae7d6f920505aed Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Fri, 27 Jan 2023 09:41:54 +0100 Subject: [PATCH 487/668] Add space between flag and the value following --- macs2.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 9d5344ae..5ccc5a5f 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -49,14 +49,14 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ - ~{"-f" + format} \ - ~{"-g" + gensz} \ - ~{"-p" + pval_thres} \ - ~{"--shift" + shiftsize} \ - ~{"--extsize" + extsize} \ + ~{"-f " + format} \ + ~{"-g " + gensz} \ + ~{"-p " + pval_thres} \ + ~{"--shift " + shiftsize} \ + ~{"--extsize " + extsize} \ ~{true='--nomodel' false='' nomodel} \ ~{true='-B' false='' bdg} \ - ~{"--keep-dup" + keepdup} \ + ~{"--keep-dup " + keepdup} \ ~{true='--call-summits' false='' callsummits} } From b79e59b1f3279bfcb26446ee5c95f1c6bfb4b16e Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Fri, 27 Jan 2023 09:44:00 +0100 Subject: [PATCH 488/668] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4962c687..bd66a6ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + nomodel + gensz From ee0b137664a20f94997e9daad8b25cc2729dc88a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:25:12 +0100 Subject: [PATCH 489/668] increase time for manta, add index to delly outputs --- delly.wdl | 2 ++ manta.wdl | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/delly.wdl b/delly.wdl index fab32784..43af3ca0 100644 --- a/delly.wdl +++ b/delly.wdl @@ -47,6 +47,7 @@ task CallSV { output { File dellyBcf = outputPath + File dellyBcfIndex = outputPath + ".csi" } runtime { @@ -100,6 +101,7 @@ task SomaticFilter { output { File filterBcf = outputPath + File filterBcfIndex = outputPath + ".csi" } runtime { diff --git a/manta.wdl b/manta.wdl index 6804f304..fde8c208 100644 --- a/manta.wdl +++ b/manta.wdl @@ -34,7 +34,7 @@ task Germline { Int cores = 1 Int memoryGb = 4 - Int timeMinutes = 60 + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } @@ -102,7 +102,7 @@ task Somatic { Int cores = 1 Int memoryGb = 4 - Int timeMinutes = 60 + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } From 1bf7725df8ff78628b3444d8ab6b6daa044836fd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:28:02 +0100 Subject: [PATCH 490/668] add bcf index input for delly somatic filter --- delly.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/delly.wdl b/delly.wdl index 43af3ca0..ab72f060 100644 --- a/delly.wdl +++ b/delly.wdl @@ -77,6 +77,7 @@ task CallSV { task SomaticFilter { input { File dellyBcf + File dellyBcfIndex Array[String]+ normalSamples Array[String]+ tumorSamples String outputPath = "./delly/delly_filter.bcf" From 9af2205811e0708be46be8e88bc1c7e1387fdfda Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:33:30 +0100 Subject: [PATCH 491/668] add index to delly call inputs as well --- delly.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/delly.wdl b/delly.wdl index ab72f060..12e68187 100644 --- a/delly.wdl +++ b/delly.wdl @@ -29,6 +29,7 @@ task CallSV { String outputPath = "./delly/delly.bcf" File? genotypeBcf + File? genotypeBcfIndex String memory = "15GiB" Int timeMinutes = 300 From 71193e8da89c9275c7f6d878e349f1bdc19543ba Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:35:07 +0100 Subject: [PATCH 492/668] update parameter_meta --- delly.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index 12e68187..2dc847b9 100644 --- a/delly.wdl +++ b/delly.wdl @@ -64,7 +64,8 @@ task CallSV { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputPath: {description: "The location the output BCF file should be written.", category: "common"} - genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples."} + genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples.", category: "advanced"} + genotypeBcfIndex: {description: "The index for the genotype BCF file.", category: "advanced"} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -114,6 +115,7 @@ task SomaticFilter { parameter_meta { dellyBcf: {description: "The BCF file produced by delly.", category: "required"} + dellyBcfIndex: {description: "The index for the delly BCF file.", category: "required"} normalSamples: {description: "The names for the normal samples as used in the delly BCF file.", category: "required"} tumorSamples: {description: "The names for the tumor samples as used in the delly BCF file.", category: "required"} outputPath: {description: "The location the output BCF file should be written.", category: "common"} From dd9ea3db69c56bef6c1d5ed63c08e10e691c6d5b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Jan 2023 11:13:40 +0100 Subject: [PATCH 493/668] give delly more time, specify normal ordinal in gridss GeneratePonBedpe command --- delly.wdl | 2 +- gridss.wdl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index 2dc847b9..b952da7e 100644 --- a/delly.wdl +++ b/delly.wdl @@ -32,7 +32,7 @@ task CallSV { File? genotypeBcfIndex String memory = "15GiB" - Int timeMinutes = 300 + Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" } diff --git a/gridss.wdl b/gridss.wdl index 8b27df77..5c203a16 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -216,6 +216,7 @@ task GeneratePonBedpe { -cp /usr/local/share/gridss-2.12.2-0/gridss.jar \ gridss.GeneratePonBedpe \ INPUT=~{sep=" INPUT=" vcfFiles} \ + NO=0 \ O=~{outputDir}/gridss_pon_breakpoint.bedpe \ SBO=~{outputDir}/gridss_pon_single_breakend.bed \ REFERENCE_SEQUENCE=~{referenceFasta} \ From 48340415ab9c852ceefaf35e2b4e2ae8b47d3f66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Jan 2023 14:34:06 +0100 Subject: [PATCH 494/668] add missing fasta index input to gridss GeneratePonBedpe --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 5c203a16..03fdc6ab 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -200,6 +200,7 @@ task GeneratePonBedpe { Array[File]+ vcfFiles Array[File]+ vcfIndexes File referenceFasta + File referenceFastaFai String outputDir = "." Int threads = 8 @@ -238,6 +239,7 @@ task GeneratePonBedpe { parameter_meta { vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + referenceFastaFai: {description: "The index for the reference genome fasta.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 42796e37927b50b2dc25249a5ff92348ebf54ce0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Feb 2023 16:03:47 +0100 Subject: [PATCH 495/668] fix output paths gridss somatic filter --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 03fdc6ab..b67f4c91 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -388,8 +388,8 @@ task SomaticFilter { File vcfIndex File ponBed File ponBedpe - String outputPath = "./high_confidence_somatic.vcf.gz" - String fullOutputPath = "./high_and_low_confidence_somatic.vcf.gz" + String outputPath = "./high_confidence_somatic.vcf.bgz" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf.bgz" String memory = "16GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" From b17076a642b17212499b6478e948661b0e9433c3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Feb 2023 16:23:37 +0100 Subject: [PATCH 496/668] fix gridss somatic filter output paths? --- gridss.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b67f4c91..5aca3825 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -388,8 +388,8 @@ task SomaticFilter { File vcfIndex File ponBed File ponBedpe - String outputPath = "./high_confidence_somatic.vcf.bgz" - String fullOutputPath = "./high_and_low_confidence_somatic.vcf.bgz" + String outputPath = "./high_confidence_somatic.vcf" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf" String memory = "16GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" @@ -411,10 +411,10 @@ task SomaticFilter { } output { - File fullVcf = fullOutputPath - File fullVcfIndex = "~{fullOutputPath}.tbi" - File highConfidenceVcf = outputPath - File highConfidenceVcfIndex = "~{outputPath}.tbi" + File fullVcf = "~{fullOutputPath}.bgz" + File fullVcfIndex = "~{fullOutputPath}.bgz.tbi" + File highConfidenceVcf = "~{outputPath}.bgz" + File highConfidenceVcfIndex = "~{outputPath}.bgz.tbi" } runtime { From d320b3c79bfc321fff1178ff571af520b7969043 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 6 Feb 2023 14:11:59 +0100 Subject: [PATCH 497/668] add samples option to bcftools view --- bcftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 726d2e37..7df8911d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -349,6 +349,7 @@ task View { String? exclude String? include + Array[String] samples = [] String memory = "256MiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) @@ -364,6 +365,7 @@ task View { ~{"--exclude " + exclude} \ ~{"--include " + include} \ ~{true="--exclude-uncalled" false="" excludeUncalled} \ + ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -389,6 +391,7 @@ task View { include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} + samples: {description: "A list of sample names to include.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 3961ab4e858d31163987bb267cbad30ea085b205 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:51:45 +0100 Subject: [PATCH 498/668] Allow a custom separator char --- umi.wdl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/umi.wdl b/umi.wdl index e7f01fc2..e4270ed6 100644 --- a/umi.wdl +++ b/umi.wdl @@ -29,6 +29,7 @@ task BamReadNameToUmiTag { File inputBam String outputPath = "output.bam" String umiTag = "RX" + String separatorChar = "_" String memory = "2GiB" Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10) @@ -45,26 +46,26 @@ task BamReadNameToUmiTag { from typing import Tuple - def split_umi_from_name(name) -> Tuple[str, str]: + def split_umi_from_name(name, separator_char = "_") -> Tuple[str, str]: id_and_rest = name.split(maxsplit=1) id = id_and_rest[0] # If there was no whitespace id_and_rest will have length 1 other_parts = id_and_rest[1] if len(id_and_rest) == 2 else "" - underscore_index = id.rfind("_") + underscore_index = id.rfind(separator_char) umi = id[underscore_index + 1:] new_id = id[:underscore_index] if other_parts: return " ".join([new_id, other_parts]), umi return new_id, umi - def annotate_umis(in_file, out_file, bam_tag="RX"): + def annotate_umis(in_file, out_file, bam_tag="RX", separator_char = "_"): in_bam = pysam.AlignmentFile(in_file, "rb") os.makedirs(os.path.dirname(out_file), exist_ok=True) out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway. encoded_bam_tag = bam_tag.encode('ascii') for segment in in_bam: # type: pysam.AlignedSegment - new_name, umi = split_umi_from_name(segment.query_name) + new_name, umi = split_umi_from_name(segment.query_name, separator_char) segment.query_name = new_name # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway. # Value type has to be a string though, otherwise pysam crashes. @@ -72,7 +73,7 @@ task BamReadNameToUmiTag { out_bam.write(segment) if __name__ == "__main__": - annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}", "~{separatorChar}") pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) CODE >>> @@ -93,6 +94,7 @@ task BamReadNameToUmiTag { inputBam: {description: "The input SAM file.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "common"} umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} + separatorChar: {description: "Character used to separate the UMIs from the read name", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 84a8781c4c94be08ba0f404902378d05db18fef9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:56:03 +0100 Subject: [PATCH 499/668] Update changelog with separatorChar --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd66a6ba..4bab712a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + nomodel From b3c9204b77851836042190486f8031dbe79a9e2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:57:25 +0100 Subject: [PATCH 500/668] Add missing interpunction --- umi.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi.wdl b/umi.wdl index e4270ed6..0628783a 100644 --- a/umi.wdl +++ b/umi.wdl @@ -94,7 +94,7 @@ task BamReadNameToUmiTag { inputBam: {description: "The input SAM file.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "common"} umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} - separatorChar: {description: "Character used to separate the UMIs from the read name", category: "common"} + separatorChar: {description: "Character used to separate the UMIs from the read name.", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From c3f246f24d05bda4ebfa781cff41dfe61bbf85b3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:05:31 +0100 Subject: [PATCH 501/668] update changelog --- CHANGELOG.md | 2 ++ scripts | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bab712a..3021817d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a task for fastp. ++ Add a task for picard CollectInsertSizeMetrics. + Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: diff --git a/scripts b/scripts index 84690a30..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 From ad97efa05229f147435ee0800b0a742a2c360435 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:06:00 +0100 Subject: [PATCH 502/668] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 98cc3e10..4142daab 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 +Subproject commit 4142daab81a7d9f28686b6a3299536757d381c81 From 669428627e26aaaafdba3ab680a37236eaa736da Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:20:13 +0100 Subject: [PATCH 503/668] update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bab712a..6e1daf97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,14 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Increased the timeMinutes runtime attribute for manta (somatic and germline) to `2880`. ++ Add a task for GRIDSS somatic filtering. ++ Add a task to generate a panel of normals BED and BEDPE file for GRIDSS. ++ Add a task to filter a GRIDSS PON. ++ Add a task for delly somatic filtering. ++ Delly CallSV's `bamFile` and `bamIndex` inputs are not arrays of files, allowing + for multiple samples to be included. ++ Add `samples` input to bcftools view to select samples included in the output vcf. + Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: From 7b9e07652461788748ed4907dd8264cbbb27ce80 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:20:49 +0100 Subject: [PATCH 504/668] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 84690a30..4142daab 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 4142daab81a7d9f28686b6a3299536757d381c81 From eba9ad4c057cf7468bd7982930af484765d1a257 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Feb 2023 15:55:01 +0100 Subject: [PATCH 505/668] add some options to disable filters in fastp --- fastp.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index db4a2d40..68c0e5cd 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -36,6 +36,8 @@ task Fastp { Int lengthRequired = 15 Int? split Boolean performAdapterTrimming = true + Boolean performQualityFiltering = true + Boolean performLengthFiltering = true Int threads = 4 String memory = "50GiB" @@ -73,7 +75,9 @@ task Fastp { --thread ~{select_first([effectiveSplit, threads])} \ ~{"--split " + effectiveSplit} \ ~{if defined(effectiveSplit) then "-d 0" else ""} \ - ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} + ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} \ + ~{if performQualityFiltering then "" else "--disable_quality_filtering"} \ + ~{if performLengthFiltering then "" else "--disable_length_filtering"} >>> output { @@ -102,6 +106,8 @@ task Fastp { lengthRequired: {description: "The minimum read length.", category: "advanced"} split: {description: "The number of chunks to split the files into. Number of threads will be set equal to the amount of splits.", category: "common"} performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} + performQualityFiltering: {description: "Whether reads should be filtered based on quality scores.", category: "advanced"} + performLengthFiltering: {description: "Whether reads shoulde be filtered based on lengths.", catgegory: "advanced"} threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From ab54bb588cd66f009df79bbf00b2238f0436fad6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Mar 2023 14:11:06 +0100 Subject: [PATCH 506/668] add option to enable/disable ploy-g trimming to fastp task --- fastp.wdl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index 68c0e5cd..9849738b 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,6 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Boolean performQualityFiltering = true Boolean performLengthFiltering = true + Boolean? performPolyGTrimming Int threads = 4 String memory = "50GiB" @@ -50,6 +51,11 @@ task Fastp { String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") + String polyGTrimmingFlag = if defined(performPolyGTrimming) + then + if select_first([performPolyGTrimming]) then "--trim_poly_g" else "--disable_trim_poly_g" + else "" + Int? effectiveSplit = if select_first([split, 1]) > 1 then split else noneInt command <<< @@ -77,7 +83,8 @@ task Fastp { ~{if defined(effectiveSplit) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} \ ~{if performQualityFiltering then "" else "--disable_quality_filtering"} \ - ~{if performLengthFiltering then "" else "--disable_length_filtering"} + ~{if performLengthFiltering then "" else "--disable_length_filtering"} \ + ~{polyGTrimmingFlag} >>> output { @@ -108,6 +115,7 @@ task Fastp { performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} performQualityFiltering: {description: "Whether reads should be filtered based on quality scores.", category: "advanced"} performLengthFiltering: {description: "Whether reads shoulde be filtered based on lengths.", catgegory: "advanced"} + performPolyGTrimming: {description: "Whether or not poly-G-tail trimming should be performed. If undefined fastp's default behaviour will be used, ie. enabled for NextSeq/NovaSeq data as detected from read headers.", category: "advanced"} threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 5d35105b452167ab9e09a9b0d9c041d2af84f253 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Mar 2023 16:30:34 +0100 Subject: [PATCH 507/668] add purple options needed for shallow mode --- hmftools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index e051dc99..78156f67 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1070,6 +1070,8 @@ task Purple { File driverGenePanel File somaticHotspots File germlineHotspots + Float? highlyDiploidPercentage + Float? somaticMinPuritySpread #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -1103,6 +1105,8 @@ task Purple { -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ -driver_gene_panel ~{driverGenePanel} \ + ~{"-highly_diploid_percentage " + highlyDiploidPercentage} \ + ~{"-somatic_min_purity_spread " + somaticMinPuritySpread} \ -threads ~{threads} } From 36a4575e20c54b062995b96c24f68733affce707 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Mar 2023 11:14:24 +0100 Subject: [PATCH 508/668] update parameter_meta and changelog --- CHANGELOG.md | 2 ++ hmftools.wdl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ce03ffc..753daf30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the + hmtools PURPLE task. + Add a task for fastp. + Add a task for picard CollectInsertSizeMetrics. + Increased the timeMinutes runtime attribute for manta (somatic and germline) to `2880`. diff --git a/hmftools.wdl b/hmftools.wdl index 78156f67..c27630a1 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1186,6 +1186,8 @@ task Purple { driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} + highlyDiploidPercentage: {description: "Equivalent to PURPLE's `-highly_diploid_percentage` option.", category: "advanced"} + somaticMinPuritySpread: {description: "Equivalent to PURPLE's `-somatic_min_purity_spread` option.", category: "advanced"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} From 1a57c2ed292504f138d8bb15ae145b7145ba6c1c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 15:08:41 +0200 Subject: [PATCH 509/668] Set stable version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 753daf30..7e62171b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.1.0-dev +version 5.1.0 --------------------------- + Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the hmtools PURPLE task. From 9394a3e29a0227e3dc1dc30700ad1d7e65b7e448 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 15:10:35 +0200 Subject: [PATCH 510/668] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 09b254e9..91ff5727 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.0.0 +5.2.0 From 64aa91e7db5e96625122b4484fb7d857a9ef2c13 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 16:32:09 +0200 Subject: [PATCH 511/668] Update cutadapt and FastQC --- CHANGELOG.md | 6 ++++++ cutadapt.wdl | 2 +- fastqc.wdl | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e62171b..a13b2f6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> + +version 5.2.0-dev +--------------------------- ++ Update cutadapt version to 4.4 ++ Update FastQC version to 0.12.1 + version 5.1.0 --------------------------- + Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the diff --git a/cutadapt.wdl b/cutadapt.wdl index 9a67692c..191e6f0a 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "5GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" + String dockerImage = "quay.io/biocontainers/cutadapt:4.4--py310h1425a21_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) diff --git a/fastqc.wdl b/fastqc.wdl index d821e531..59592d4e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -47,7 +47,7 @@ task Fastqc { Int threads = 1 String memory = "2GiB" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" + String dockerImage = "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0" Array[File]? noneArray File? noneFile From 5cf560b5a9e69ba683c431193c330fdb7a41c028 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 16:55:44 +0200 Subject: [PATCH 512/668] Update classpath --- fastqc.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqc.wdl b/fastqc.wdl index 59592d4e..da31882c 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -67,7 +67,7 @@ task Fastqc { command <<< set -e mkdir -p "~{outdirPath}" - FASTQC_DIR="/usr/local/opt/fastqc-0.11.9" + FASTQC_DIR="/usr/local/opt/fastqc-0.12.1" export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \ -Xms200M -Xmx~{javaXmx} \ From 0ed76c14ffe5ab4779ed42f924fbcab1acdda266 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 1 May 2023 15:46:55 +0200 Subject: [PATCH 513/668] Stable version in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a13b2f6c..1551d13d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.2.0-dev +version 5.2.0 --------------------------- + Update cutadapt version to 4.4 + Update FastQC version to 0.12.1 From 73f769bb966f67b9bf3fd72b9f5c4d6f923ccafa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 1 May 2023 15:52:47 +0200 Subject: [PATCH 514/668] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 91ff5727..03f488b0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.2.0 +5.3.0 From 0062b727197ae2601b234d7a69ae0f64bd7b59d1 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 8 Jun 2023 13:01:16 +0200 Subject: [PATCH 515/668] Add revcomp flag to cutadapt --- CHANGELOG.md | 5 +++++ cutadapt.wdl | 3 +++ 2 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1551d13d..5eb2ef17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.3.0-dev +--------------------------- ++ Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. + + version 5.2.0 --------------------------- + Update cutadapt version to 4.4 diff --git a/cutadapt.wdl b/cutadapt.wdl index 191e6f0a..a164e360 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -81,6 +81,7 @@ task Cutadapt { Boolean? bwa Boolean? zeroCap Boolean? noZeroCap + Boolean revcomp = false Int cores = 4 String memory = "5GiB" @@ -149,6 +150,7 @@ task Cutadapt { ~{true="--bwa" false="" bwa} \ ~{true="--zero-cap" false="" zeroCap} \ ~{true="--no-zero-cap" false="" noZeroCap} \ + ~{if revcomp then "--revcomp" else ""} ~{read1} \ ~{read2} \ ~{"> " + reportPath} @@ -231,6 +233,7 @@ task Cutadapt { bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"} zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"} noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"} + revcomp: {description: "Equivalent to cutadapt's --revcomp flag.", category: "advanced"} cores: {description: "The number of cores to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 502d73003072327d9756b4b2ce0c2f768ff1192a Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 8 Jun 2023 13:02:14 +0200 Subject: [PATCH 516/668] add missing backslash --- cutadapt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index a164e360..c695c08e 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -150,7 +150,7 @@ task Cutadapt { ~{true="--bwa" false="" bwa} \ ~{true="--zero-cap" false="" zeroCap} \ ~{true="--no-zero-cap" false="" noZeroCap} \ - ~{if revcomp then "--revcomp" else ""} + ~{if revcomp then "--revcomp" else ""} \ ~{read1} \ ~{read2} \ ~{"> " + reportPath} From cebb1b535be90193ed27c57f3ea2c659f20bfe39 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:20:13 +0200 Subject: [PATCH 517/668] add a task for fastqFilter --- fastqFilter.wdl | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 fastqFilter.wdl diff --git a/fastqFilter.wdl b/fastqFilter.wdl new file mode 100644 index 00000000..d436b1ab --- /dev/null +++ b/fastqFilter.wdl @@ -0,0 +1,66 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2023 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task FastqFilter { + input { + Array[File]+ fastq + Array[String]+ outputPaths + Int? minLength + Int? maxLength + + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(seqFile, "G")) + String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" + } + + command { + set -e + mkdir -p $(dirname ~{sep=" " outputPaths}) + fastq-filter \ + -o ~{sep=" -o " outputPaths} \ + ~{"-l " + minLength} \ + ~{"-L " + maxLength} \ + ~{sep=" " fastq} + } + + output { + Array[File] filtered = outputPaths + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + fastq: {description: "A list of fastq files to filter.", category: "required"} + outputPaths: {description: "A list containing the output paths for each input fastq file.", category: "required"} + minLength: {description: "Equivalent to fastq-filter's `--min-length` option.", category: "common"} + maxLength: {description: "Equivalent to fastq-filter's `--max-length` option.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 93e491d37de5780bea73010323dcef939814cdbc Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:21:47 +0200 Subject: [PATCH 518/668] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eb2ef17..34bf0600 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.3.0-dev --------------------------- ++ Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From 3fc46b91cc63c31b1477692638492fdda9bbc084 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:32:34 +0200 Subject: [PATCH 519/668] fix copy-paste error --- fastqFilter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqFilter.wdl b/fastqFilter.wdl index d436b1ab..2b2fcc45 100644 --- a/fastqFilter.wdl +++ b/fastqFilter.wdl @@ -30,7 +30,7 @@ task FastqFilter { Int? maxLength String memory = "4GiB" - Int timeMinutes = 1 + ceil(size(seqFile, "G")) + Int timeMinutes = 1 + ceil(size(fastq, "G")) String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" } From 2bc4c06dd89444b6ccb42244a566873ba7fad5a2 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Tue, 13 Jun 2023 09:37:54 +0200 Subject: [PATCH 520/668] use 1GiB for fastqFilter --- fastqFilter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqFilter.wdl b/fastqFilter.wdl index 2b2fcc45..3701b8aa 100644 --- a/fastqFilter.wdl +++ b/fastqFilter.wdl @@ -29,7 +29,7 @@ task FastqFilter { Int? minLength Int? maxLength - String memory = "4GiB" + String memory = "1GiB" Int timeMinutes = 1 + ceil(size(fastq, "G")) String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" } From 3fb2c1de2e19f68f7a3ab3e205864bff21bb3ba1 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <15814544+Redmar-van-den-Berg@users.noreply.github.com> Date: Thu, 7 Sep 2023 08:48:09 +0200 Subject: [PATCH 521/668] Use softlink instead of hardlinks If the database files are on a different filesystem then the analysis folder, hardlinks are not allowed, leading to crashes. --- centrifuge.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 757af239..41a907ae 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -122,7 +122,7 @@ task Classify { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge \ ~{inputFormatOptions[inputFormat]} \ @@ -199,7 +199,7 @@ task Inspect { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge-inspect \ ~{outputOptions[printOption]} \ @@ -256,7 +256,7 @@ task KReport { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge-kreport \ -x $PWD/${indexBasename} \ From 44cdc1862bf20b1cf77f0fedfb0ba25b3e5efa43 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 7 Sep 2023 08:52:12 +0200 Subject: [PATCH 522/668] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34bf0600..6acbbc85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.3.0-dev --------------------------- ++ Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From 7404b0e6f7470c4d04d80f7037f1068ad091d9ba Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 26 Aug 2024 17:07:03 +0200 Subject: [PATCH 523/668] Add a selectGenotype switch --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 0b93efe6..a2aff322 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1514,6 +1514,7 @@ task SelectVariants { Array[File] intervals = [] String? selectTypeToInclude + String? selectGenotype String javaXmx = "4G" String memory = "5GiB" @@ -1529,6 +1530,7 @@ task SelectVariants { -R ~{referenceFasta} \ -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ + ~{"-select-genotype " + selectGenotype} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From d86d9cb89a8f8b74ad2b714a23e1686fd4f26e3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 27 Aug 2024 10:19:18 +0200 Subject: [PATCH 524/668] Quote select genotype value --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index a2aff322..f272a2f9 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1530,7 +1530,7 @@ task SelectVariants { -R ~{referenceFasta} \ -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ - ~{"-select-genotype " + selectGenotype} \ + ~{"-select-genotype \"" + selectGenotype}~{true="\"" false="" defined(selectGenotype)} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From 558c9b7d7370b0f46346c16beaa4d4cb3f48b09e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 30 Aug 2024 15:23:55 +0200 Subject: [PATCH 525/668] Add exclude filtered expression --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index f272a2f9..230674a5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1513,6 +1513,7 @@ task SelectVariants { String outputPath = "output.vcf.gz" Array[File] intervals = [] + Boolean excludeFiltered = false String? selectTypeToInclude String? selectGenotype @@ -1531,6 +1532,7 @@ task SelectVariants { -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ ~{"-select-genotype \"" + selectGenotype}~{true="\"" false="" defined(selectGenotype)} \ + ~{true="--exclude-filtered" false="" excludeFiltered} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From 75f36133cb52ce6f02701ff11612f6884a8d1726 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 18 Oct 2024 14:52:33 +0200 Subject: [PATCH 526/668] Use reference files in rtg-tools tasks to make tasks cacheable --- rtg.wdl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/rtg.wdl b/rtg.wdl index 3e9dab9b..62e1e77f 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -24,8 +24,7 @@ task Format { input { Array[File]+ inputFiles String format = "fasta" - String outputPath = "seq_data.sdf" - + String outputPath = "reference_data" String rtgMem = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(inputFiles, "GiB") * 2) @@ -41,7 +40,7 @@ task Format { } output { - File sdf = outputPath + Array[File] referenceFiles = glob("~{outputPath}/*") } runtime { @@ -61,7 +60,7 @@ task Format { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - sdf: {description: "RTGSequence Data File (SDF) format version of the input file(s)."} + referenceFiles: {description: "An array with all the generated reference files"} } } @@ -74,7 +73,7 @@ task VcfEval { Boolean squashPloidy = false String outputMode = "split" String outputDir = "output/" - File template + Array[File] referenceFiles Boolean allRecords = false Boolean decompose = false Boolean refOverlap = false @@ -99,7 +98,7 @@ task VcfEval { ~{"--evaluation-regions " + evaluationRegions} \ ~{"--bed-regions " + bedRegions} \ --output ~{outputDir} \ - --template ~{template} \ + --template $(dirname ~{referenceFiles[0]}) \ ~{true="--all-records" false="" allRecords} \ ~{true="--decompose" false="" decompose} \ ~{true="--ref-overlap" false="" refOverlap} \ @@ -152,7 +151,7 @@ task VcfEval { squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences.", category: "common"} outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split).", category: "advanced"} outputDir: {description: "Directory for output.", category: "advanced"} - template: {description: "SDF of the reference genome the variants are called against.", category: "required"} + referenceFiles: {description: "An array of reference Files generated by the Format task.", category: "required"} allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\").", category: "common"} decompose: {description: "decompose complex variants into smaller constituents to allow partial credit.", category: "common"} refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap).", category: "common"} From 53d5083e5ca9de973eba1916dc273e0ff3dd9e04 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:04:31 +0100 Subject: [PATCH 527/668] Update minimap2 task to output sorted BAM --- minimap2.wdl | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/minimap2.wdl b/minimap2.wdl index 96cc7734..47464585 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -81,15 +81,19 @@ task Indexing { task Mapping { input { String presetOption - Int kmerSize = 15 - Boolean skipSelfAndDualMappings = false - Boolean outputSam = false String outputPrefix - Boolean addMDTagToSam = false - Boolean secondaryAlignment = false File referenceFile File queryFile + + Int compressionLevel = 1 + Int additionalSortThreads = 1 + Int sortMemoryGb = 1 + Boolean skipSelfAndDualMappings = false + Boolean addMDTagToSam = false + Boolean secondaryAlignment = true + + Int? kmerSize Int? maxIntronLength Int? maxFragmentLength Int? retainMaxSecondaryAlignments @@ -97,8 +101,8 @@ task Mapping { Int? mismatchPenalty String? howToFindGTAG - Int cores = 4 - String memory = "30GiB" + Int cores = 8 + String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } @@ -108,13 +112,11 @@ task Mapping { mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ -x ~{presetOption} \ - -k ~{kmerSize} \ ~{true="-X" false="" skipSelfAndDualMappings} \ - ~{true="-a" false="" outputSam} \ - -o ~{outputPrefix} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ -t ~{cores} \ + ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ ~{"-F " + maxFragmentLength} \ ~{"-N " + retainMaxSecondaryAlignments} \ @@ -122,11 +124,18 @@ task Mapping { ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ ~{referenceFile} \ - ~{queryFile} + ~{queryFile} \ + | samtools sort \ + -@ ~{additionalSortThreads} \ + -l ~{compressionLevel} \ + -m ~{sortMemoryGb}G \ + -o ~{outputPrefix}.bam + samtools index -o ~{outputPrefix}.bam } output { - File alignmentFile = outputPrefix + File bam = ~{outputPrefix}.bam + File bamIndex = ~{outputPrefix}.bam.bai } runtime { From 77506d8d208b524cfb2427314d4568aac75e4b87 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:05:39 +0100 Subject: [PATCH 528/668] Add a flag for namesorting --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 47464585..64313ef4 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -88,6 +88,7 @@ task Mapping { Int compressionLevel = 1 Int additionalSortThreads = 1 Int sortMemoryGb = 1 + Boolean nameSorted = false Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false @@ -126,6 +127,7 @@ task Mapping { ~{referenceFile} \ ~{queryFile} \ | samtools sort \ + ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ From e78cfa0c198a65d60f6b1adb3e33878c02e5c90f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:18:46 +0100 Subject: [PATCH 529/668] Add clair3 task --- clair3.wdl | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 clair3.wdl diff --git a/clair3.wdl b/clair3.wdl new file mode 100644 index 00000000..eb18d208 --- /dev/null +++ b/clair3.wdl @@ -0,0 +1,61 @@ +version 1.0 + +# Copyright (c) 2024 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Clair3 { + input { + File bam + File bamIndex + File referenceFasta + File referenceFastaFai + String outputPrefix + File? model + String? builtinModel + String platform + Int threads = 8 + Boolean includeAllCtgs = false + String memory = "20GiB" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / cores) + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + } + + # A default set for testing + String modelArg = "~{true=model false=builtinModel, defined(model)}" + + command <<< + run_clair3.sh \ + --model=~{modelArg} \ + --ref_fn=~{reference_fasta} \ + --bam_fn=~{bam} \ + --output=out \ + --threads=~{threads} \ + --platform=~{platform} \ + ~{true="--include_all_ctgs" false =""} + mv out/merge_output.vcf.gz ~{prefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{prefix}.vcf.gz.tbi + >>> + output { + File vcf = "~{outputPrefix}.vcf.gz" + File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" + } + + +} \ No newline at end of file From 0d84d673368819a78296f97f0f5b6c3225439ded Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 12:20:35 +0100 Subject: [PATCH 530/668] Add sequali and update multiqc to a version that supports it --- multiqc.wdl | 2 +- sequali.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 sequali.wdl diff --git a/multiqc.wdl b/multiqc.wdl index 21fc8a7d..f04a1021 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" + String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0 " } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/sequali.wdl b/sequali.wdl new file mode 100644 index 00000000..98700fb7 --- /dev/null +++ b/sequali.wdl @@ -0,0 +1,46 @@ +version 1.0 + +# Copyright (c) 2024 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sequali { + input { + File reads + File? mate_reads + Int threads = 2 + String outDir = "." + dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + } + + command <<< + set -e + mkdir -p $(dirname outputDir) + sequali \ + --outdir ~{outDir} \ + --threads ~{threads} \ + ~{reads} \ + ~{mate_reads} + >>> + + output { + File html = basename(reads) + ".html" + File json = basename(reads) + ".json" + } +} \ No newline at end of file From 272842244d79797615aa430bb6836a8cb78ba8fd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 12:45:59 +0100 Subject: [PATCH 531/668] Fix womtool validation errors --- clair3.wdl | 16 ++++++++-------- minimap2.wdl | 8 ++++---- sequali.wdl | 9 ++++++++- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index eb18d208..6c0c1d38 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -33,25 +33,25 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "20GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } - # A default set for testing - String modelArg = "~{true=model false=builtinModel, defined(model)}" + String modelArg = "~{true=model false=builtinModel defined(model)}" command <<< run_clair3.sh \ --model=~{modelArg} \ - --ref_fn=~{reference_fasta} \ + --ref_fn=~{referenceFasta} \ --bam_fn=~{bam} \ --output=out \ --threads=~{threads} \ --platform=~{platform} \ - ~{true="--include_all_ctgs" false =""} - mv out/merge_output.vcf.gz ~{prefix}.vcf.gz - mv out/merge_output.vcf.gz.tbi ~{prefix}.vcf.gz.tbi + ~{true="--include_all_ctgs" false ="" includeAllCtgs} + mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi >>> + output { File vcf = "~{outputPrefix}.vcf.gz" File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" diff --git a/minimap2.wdl b/minimap2.wdl index 64313ef4..fff5b4ec 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -136,8 +136,8 @@ task Mapping { } output { - File bam = ~{outputPrefix}.bam - File bamIndex = ~{outputPrefix}.bam.bai + File bam = "~{outputPrefix}.bam " + File bamIndex = "~{outputPrefix}.bam.bai" } runtime { @@ -152,7 +152,6 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} - outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} @@ -170,6 +169,7 @@ task Mapping { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} + bam: {description: "Mapping and alignment between collections of dna sequences file in BAM format."} + bamIndex: {description: "Accompanying index file for the BAM file."} } } diff --git a/sequali.wdl b/sequali.wdl index 98700fb7..c2eff2c9 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -26,7 +26,7 @@ task Sequali { File? mate_reads Int threads = 2 String outDir = "." - dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" } command <<< @@ -43,4 +43,11 @@ task Sequali { File html = basename(reads) + ".html" File json = basename(reads) + ".json" } + + runtime { + cpu: threads + memory: "2GiB" + docker: dockerImage + time_minutes: 59 + } } \ No newline at end of file From 01ff19c51bf4b8ff28cf16b067bbb128d2d435b4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 13:12:56 +0100 Subject: [PATCH 532/668] Fix runtime issues --- clair3.wdl | 12 ++++++++++-- minimap2.wdl | 12 +++++++----- multiqc.wdl | 2 +- sequali.wdl | 4 ++-- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index 6c0c1d38..2d111a5d 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,12 +34,14 @@ task Clair3 { Boolean includeAllCtgs = false String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } - String modelArg = "~{true=model false=builtinModel defined(model)}" + String modelArg = "~{if defined(model) then model else builtinModel}" command <<< + set -e + mkdir -p $(dirname ~{outputPrefix}) run_clair3.sh \ --model=~{modelArg} \ --ref_fn=~{referenceFasta} \ @@ -57,5 +59,11 @@ task Clair3 { File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" } + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } } \ No newline at end of file diff --git a/minimap2.wdl b/minimap2.wdl index fff5b4ec..5709c998 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -105,13 +105,15 @@ task Mapping { Int cores = 8 String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + # Minimap 2.28 samtools 1.20 + String dockerImage = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" } command { set -e mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ + -a \ -x ~{presetOption} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ @@ -125,19 +127,19 @@ task Mapping { ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ ~{referenceFile} \ - ~{queryFile} \ + ~{queryFile} \ | samtools sort \ ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam - samtools index -o ~{outputPrefix}.bam + samtools index ~{outputPrefix}.bam } output { - File bam = "~{outputPrefix}.bam " - File bamIndex = "~{outputPrefix}.bam.bai" + File bam = "~{outputPrefix}.bam" + File bamIndex = "~{outputPrefix}.bam.bai" } runtime { diff --git a/multiqc.wdl b/multiqc.wdl index f04a1021..a2e32cdb 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0 " + String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0" } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/sequali.wdl b/sequali.wdl index c2eff2c9..ed6e5d40 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -40,8 +40,8 @@ task Sequali { >>> output { - File html = basename(reads) + ".html" - File json = basename(reads) + ".json" + File html = outDir + "/" + basename(reads) + ".html" + File json = outDir + "/" + basename(reads) + ".json" } runtime { From a488618740428dcc7e940a6b27750ff62b87428e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 13:28:56 +0100 Subject: [PATCH 533/668] Include all contigs by default for clair3 --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 2d111a5d..d824ec13 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -31,7 +31,7 @@ task Clair3 { String? builtinModel String platform Int threads = 8 - Boolean includeAllCtgs = false + Boolean includeAllCtgs = true # Not the clair3 default, but generally what you want. String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" From 1bc3416c90953ba05d3e00370c74355ad0fa7c9b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 15:17:14 +0100 Subject: [PATCH 534/668] Work from a model tar file --- clair3.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index d824ec13..7b2d98fe 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -27,7 +27,7 @@ task Clair3 { File referenceFasta File referenceFastaFai String outputPrefix - File? model + File? modelTar String? builtinModel String platform Int threads = 8 @@ -37,10 +37,11 @@ task Clair3 { String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } - String modelArg = "~{if defined(model) then model else builtinModel}" + String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" command <<< set -e + ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } mkdir -p $(dirname ~{outputPrefix}) run_clair3.sh \ --model=~{modelArg} \ From 8fa481125d3038034a2ae28fedf88809b10e0c98 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 11 Nov 2024 14:30:25 +0100 Subject: [PATCH 535/668] Set includeAlCtgs to false --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 7b2d98fe..bc25394b 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -31,7 +31,7 @@ task Clair3 { String? builtinModel String platform Int threads = 8 - Boolean includeAllCtgs = true # Not the clair3 default, but generally what you want. + Boolean includeAllCtgs = false String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" From 98d9e2c92b0655eb022bd9793b3449ba3eb52b9f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 12 Nov 2024 08:38:00 +0100 Subject: [PATCH 536/668] Increase memory --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index bc25394b..4184f49e 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -32,7 +32,7 @@ task Clair3 { String platform Int threads = 8 Boolean includeAllCtgs = false - String memory = "20GiB" + String memory = "~{threads + 16}GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } From f13a7e2dbe793b2742080b91d90e42b29f6c0e6c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 13 Nov 2024 16:47:03 +0100 Subject: [PATCH 537/668] Update parameter_meta --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 230674a5..655a0b66 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1558,6 +1558,8 @@ task SelectVariants { outputPath: {description: "The location the output VCF file should be written.", category: "advanced"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} selectTypeToInclude: {description: "Select only a certain type of variants from the input file.", category: "common"} + excludeFiltered: {description: "Remove all variants that do not have a PASS filter", category: "advanced"} + selectGenotype: {description: "The genotype to be selected", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 3c8d2e73d12d9cd3101752dff2976f86d61b4c23 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 13 Nov 2024 16:48:14 +0100 Subject: [PATCH 538/668] Update changelog --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acbbc85..6db06e23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,13 +8,14 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.3.0-dev +version 6.0.0-dev --------------------------- ++ rtg Format and VcfEval tasks now handle reference as an array of files to enable caching. ++ Added --select-genotype and --exclude-filtered flags to GATK SelectVariants + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. - version 5.2.0 --------------------------- + Update cutadapt version to 4.4 From a6eec0e6af6554ba1c85a24e3a63b0bcd01cfe76 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Dec 2024 15:51:29 +0100 Subject: [PATCH 539/668] Add a readgroup flag to minimap2 --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 5709c998..e785ffd7 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -101,6 +101,7 @@ task Mapping { Int? matchingScore Int? mismatchPenalty String? howToFindGTAG + String? readgroup Int cores = 8 String memory = "24GiB" @@ -126,6 +127,7 @@ task Mapping { ~{"-A " + matchingScore} \ ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ + ~{"-R '" + readgroup}~{false="" true="'" defined(readgroup)} \ ~{referenceFile} \ ~{queryFile} \ | samtools sort \ From b717f3fa8d82d3bb040d3df134533839f5adec9d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 17 Dec 2024 17:33:59 +0100 Subject: [PATCH 540/668] Add -o pipefail --- minimap2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/minimap2.wdl b/minimap2.wdl index e785ffd7..95b84bc4 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -111,7 +111,7 @@ task Mapping { } command { - set -e + set -e -o pipefail mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ -a \ From 42ca869223960072ca0f9fc1e87aae7f469a4d34 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 8 Jan 2025 17:35:29 +0100 Subject: [PATCH 541/668] Allow copying of comments from fastq --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 95b84bc4..daf47a9a 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -93,6 +93,7 @@ task Mapping { Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false Boolean secondaryAlignment = true + Boolean copyCommentsFromFastq = true Int? kmerSize Int? maxIntronLength @@ -119,6 +120,7 @@ task Mapping { ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ + ~{true="-y" false="" copyCommentsFromFastq} \ -t ~{cores} \ ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ From 7240b178ef378d39b5cb0983cf3a681b0bf52488 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 15:53:23 +0100 Subject: [PATCH 542/668] Allow minimap2 to process uBAM --- minimap2.wdl | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/minimap2.wdl b/minimap2.wdl index daf47a9a..18127cb1 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -89,11 +89,19 @@ task Mapping { Int additionalSortThreads = 1 Int sortMemoryGb = 1 Boolean nameSorted = false + # MM, ML, MN -> Methylation flags + # Also keep the following flags for Sequali to be able to run on the mapped bam file and get ONT information. + # ch -> channel + # st -> start time + # du -> duration + # dx -> Whether read was duplex + # pi -> Parent ID for split read + + String tagsToKeep = "MM,ML,MN,ch,st,du,dx,pi" Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false Boolean secondaryAlignment = true - Boolean copyCommentsFromFastq = true Int? kmerSize Int? maxIntronLength @@ -111,16 +119,21 @@ task Mapping { String dockerImage = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" } - command { + # Always run data through samtools fastq. This supports both FASTQ and uBAM + # files. It does remove any existing FASTQ comments, but this should not be + # problematic for most files. + + command <<< set -e -o pipefail mkdir -p "$(dirname ~{outputPrefix})" + samtools fastq -T "~{tagsToKeep}" ~{queryFile} | \ minimap2 \ -a \ -x ~{presetOption} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ - ~{true="-y" false="" copyCommentsFromFastq} \ + -y \ -t ~{cores} \ ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ @@ -131,7 +144,7 @@ task Mapping { ~{"-u " + howToFindGTAG} \ ~{"-R '" + readgroup}~{false="" true="'" defined(readgroup)} \ ~{referenceFile} \ - ~{queryFile} \ + - \ | samtools sort \ ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ @@ -139,7 +152,7 @@ task Mapping { -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam samtools index ~{outputPrefix}.bam - } + >>> output { File bam = "~{outputPrefix}.bam" @@ -168,6 +181,7 @@ task Mapping { retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} + tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From c7c1b5bb932de4ea6d1ca3069007d4e1ad5c168d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 16:04:08 +0100 Subject: [PATCH 543/668] Allow sample name to set --- clair3.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clair3.wdl b/clair3.wdl index 4184f49e..db2c2fb5 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -27,6 +27,7 @@ task Clair3 { File referenceFasta File referenceFastaFai String outputPrefix + String? sampleName File? modelTar String? builtinModel String platform @@ -50,6 +51,7 @@ task Clair3 { --output=out \ --threads=~{threads} \ --platform=~{platform} \ + ~{"--sample_name=" + sampleName} \ ~{true="--include_all_ctgs" false ="" includeAllCtgs} mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi From e3ceb602b5baf955f850f30301a68bc1a1a1c970 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 16:47:20 +0100 Subject: [PATCH 544/668] Proper numshards to deepvariant and update it to latest version --- deepvariant.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 25d05bd9..2d212000 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -27,19 +27,19 @@ task RunDeepVariant { File inputBam File inputBamIndex String modelType - String outputVcf + String outputVcf = "sample.vcf.gz" String? postprocessVariantsExtraArgs File? customizedModel - Int? numShards + Int numShards = 4 String? outputGVcf String? outputGVcfIndex File? regions String? sampleName Boolean? VCFStatsReport = true - String memory = "3GiB" + String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.0.0" + String dockerImage = "google/deepvariant:1.6.1" } command { @@ -62,6 +62,7 @@ task RunDeepVariant { memory: memory time_minutes: timeMinutes docker: dockerImage + cpu: numShards } output { From a5dca2e7596f50436beb6c69b597722dc4aaa764 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 11:28:46 +0100 Subject: [PATCH 545/668] Add modkit pileup --- modkit.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 modkit.wdl diff --git a/modkit.wdl b/modkit.wdl new file mode 100644 index 00000000..4ac6bfa6 --- /dev/null +++ b/modkit.wdl @@ -0,0 +1,64 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Pileup { + input { + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + File bam + File bamIndex + String outputBed = "output.bed" + File referenceFasta + File referenceFastaFai + + Int? intervalSize + File? includeBed + + Boolean cpg = false + Boolean combineMods = false + String logFilePath = "modkit.log" + + Int threads = 4 + + } + + command <<< + set -e + mkdir -p $(dirname ~{outputBed}) + mkdir -p $(dirname ~{logFilePath}) + modkit pileup \ + --threads ~{threads} \ + ~{"--interval-size " + intervalSize} \ + ~{"--include-bed " + includeBed} + --ref ~{referenceFasta} \ + ~{true="--cpg" false="" cpg} \ + ~{true="--combine-mods" false="" combineMods} \ + --log-filepath ~{logFilePath} \ + ~{bam} \ + ~{outputBed} + >>> + + runtime { + docker: dockerImage + cpu: threads + + } +} \ No newline at end of file From 085fc5dd691444c9bcdb6c0483413ce5c1cf8d5f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:15:28 +0100 Subject: [PATCH 546/668] Update modkit --- modkit.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index 4ac6bfa6..9f311121 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -56,6 +56,11 @@ task Pileup { ~{outputBed} >>> + output { + File out = outputBed + File logFile = logFilePath + } + runtime { docker: dockerImage cpu: threads From 3540b4a12a2b7d56249f2d20941a6526af9c8f6e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:19:14 +0100 Subject: [PATCH 547/668] Add memory to modkit --- modkit.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index 9f311121..96f92c41 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -37,6 +37,7 @@ task Pileup { String logFilePath = "modkit.log" Int threads = 4 + String memory = "16GiB" } @@ -64,6 +65,7 @@ task Pileup { runtime { docker: dockerImage cpu: threads + memory: memory } } \ No newline at end of file From bc179875e1cf04fcd4efc63338b73d1230e3ef96 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:23:21 +0100 Subject: [PATCH 548/668] Add missing backslash --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 96f92c41..4f8bceb4 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -48,7 +48,7 @@ task Pileup { modkit pileup \ --threads ~{threads} \ ~{"--interval-size " + intervalSize} \ - ~{"--include-bed " + includeBed} + ~{"--include-bed " + includeBed} \ --ref ~{referenceFasta} \ ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ From c69c5cb2031913669dba5bf2cfe1acc4b00fed95 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 15:44:00 +0100 Subject: [PATCH 549/668] Set rather high defaults for time and memory for modkit --- modkit.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 4f8bceb4..d827d896 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -36,8 +36,9 @@ task Pileup { Boolean combineMods = false String logFilePath = "modkit.log" - Int threads = 4 - String memory = "16GiB" + Int threads = 8 + String memory = "48GiB" + Int timeMinutes = 4320 # 3 Days } @@ -66,6 +67,6 @@ task Pileup { docker: dockerImage cpu: threads memory: memory - + time_minutes: timeMinutes } } \ No newline at end of file From beec409c6e2ce345d6976f159d7da73b79110fe4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 16:35:59 +0100 Subject: [PATCH 550/668] Upgrade sequali memory --- sequali.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sequali.wdl b/sequali.wdl index ed6e5d40..664fc082 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -46,7 +46,7 @@ task Sequali { runtime { cpu: threads - memory: "2GiB" + memory: "4GiB" docker: dockerImage time_minutes: 59 } From a87956ed26298c48b29f23782dc268f8d8bf29ff Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2025 15:10:51 +0100 Subject: [PATCH 551/668] Add modkit flags --- modkit.wdl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index d827d896..35d3c7fc 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -34,6 +34,9 @@ task Pileup { Boolean cpg = false Boolean combineMods = false + Boolean combineStrands = false + Boolean bedgraph = false + String? ignore String logFilePath = "modkit.log" Int threads = 8 @@ -50,9 +53,12 @@ task Pileup { --threads ~{threads} \ ~{"--interval-size " + intervalSize} \ ~{"--include-bed " + includeBed} \ + ~{"--ignore " + ignore} \ --ref ~{referenceFasta} \ ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ + ~{true="--combine-strands" false="" combineStrands} \ + ~{true="--bedgraph" false="" bedgraph} \ --log-filepath ~{logFilePath} \ ~{bam} \ ~{outputBed} From 730a8a7672b491ccac1dbfdab497a9420ac40f71 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2025 16:12:37 +0100 Subject: [PATCH 552/668] Capture multiple output files --- modkit.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 35d3c7fc..1cac1bd1 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -65,7 +65,8 @@ task Pileup { >>> output { - File out = outputBed + File? out = outputBed # Normal mode + Array[File] outFiles = glob(outputBed + "/*") # Bedgraph mode File logFile = logFilePath } From ed50e2dfb30a8f354f4e0dd2a4f7ae5aeec952fe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Feb 2025 17:01:46 +0100 Subject: [PATCH 553/668] Update documentation for new tasks --- clair3.wdl | 22 ++++++++++++++++++++++ modkit.wdl | 30 +++++++++++++++++++++++++++++- sequali.wdl | 25 ++++++++++++++++++++++--- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index db2c2fb5..709d59b5 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -69,4 +69,26 @@ task Clair3 { docker: dockerImage } + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputPrefix: {description: "The output prefix where the data should be placed.", category: "common"} + modelTar: {description: "The TAR file with the model", category: "common"} + builtinModel: {description: "The builtin model name (in case a tar file is not used)", category: "common"} + sampleName: {description: "The name of the sample in the VCF", category: "common"} + platform: {description: "platform setting for clair3.", category: "required"} + includeAllCtgs: {description: "whether or not to call all contigs in the reference", category: "advanced"} + threads: {description: "The number of threads to use for variant calling.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + vcf: {description: "Output VCF file."} + vcfIndex: {description: "Output VCF index."} + + } } \ No newline at end of file diff --git a/modkit.wdl b/modkit.wdl index 1cac1bd1..382bfc09 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -22,7 +22,6 @@ version 1.0 task Pileup { input { - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" File bam File bamIndex String outputBed = "output.bed" @@ -42,6 +41,7 @@ task Pileup { Int threads = 8 String memory = "48GiB" Int timeMinutes = 4320 # 3 Days + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } @@ -76,4 +76,32 @@ task Pileup { memory: memory time_minutes: timeMinutes } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputBed: {description: "The output name where the data should be placed.", category: "common"} + + intervalSize: {description: "Sets the interval size", category: "advanced"} + includeBed: {description: "Bed file with regions to include", category: "advanced"} + cpg: {description: "Whether to call only at cpg sites", category: "advanced"} + combineMods: {description: "Whether to combine modifications in the output", category: "advanced"} + combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} + bedgraph: {description: "Whether to create a folder instead with a bedgraph file", category: "advanced"} + ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} + logFilePath: {description: "Path where the log file should be written.", category: "advanced"} + + threads: {description: "The number of threads to use for variant calling.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + out: {description: "The output bed files. Not available when bedgraph = true."} + outFiles: {description: "Output files when bedgraph = true."} + logFile: {description: "The generated log file."} + } } \ No newline at end of file diff --git a/sequali.wdl b/sequali.wdl index 664fc082..cbd3d869 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -24,9 +24,12 @@ task Sequali { input { File reads File? mate_reads - Int threads = 2 String outDir = "." + + Int threads = 2 + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + Int timeMinutes = 59 } command <<< @@ -46,8 +49,24 @@ task Sequali { runtime { cpu: threads - memory: "4GiB" + memory: memory docker: dockerImage - time_minutes: 59 + time_minutes: timeMinutes + } + parameter_meta { + # inputs + reads: {description: "A FASTQ or BAM file.", category: "required"} + mate_reads: {description: "FASTQ mate file"} + threads: {description: "The number of cores to use.", category: "advanced"} + + outDir: {description: "The path to write the output to.", catgory: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + html: {description: "HTML report file."} + json: {description: "JSON report file for use with MultiQC."} } } \ No newline at end of file From 113d4c58930aa2fcde99eed5b018bb8061e612cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 09:13:34 +0100 Subject: [PATCH 554/668] Update changelog --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acbbc85..97a1d016 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,14 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.3.0-dev +version 6.0.0-dev --------------------------- ++ Add Sequali task. ++ Add Clair3 task. ++ Add Modkit task. ++ Modify minimap2 task to accept ubam input, including transfer of methylation + tags. Also sort the BAM output file by coordinate. ++ Update DeepVariant container and update resource requirements. + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From a01b54a0b79a135b3ddf319f71e51d1ef06f0f56 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:00:01 +0100 Subject: [PATCH 555/668] Indent clair3 command --- clair3.wdl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index 709d59b5..4d9092f2 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -41,20 +41,20 @@ task Clair3 { String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" command <<< - set -e - ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } - mkdir -p $(dirname ~{outputPrefix}) - run_clair3.sh \ - --model=~{modelArg} \ - --ref_fn=~{referenceFasta} \ - --bam_fn=~{bam} \ - --output=out \ - --threads=~{threads} \ - --platform=~{platform} \ - ~{"--sample_name=" + sampleName} \ - ~{true="--include_all_ctgs" false ="" includeAllCtgs} - mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz - mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi + set -e + ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } + mkdir -p $(dirname ~{outputPrefix}) + run_clair3.sh \ + --model=~{modelArg} \ + --ref_fn=~{referenceFasta} \ + --bam_fn=~{bam} \ + --output=out \ + --threads=~{threads} \ + --platform=~{platform} \ + ~{"--sample_name=" + sampleName} \ + ~{true="--include_all_ctgs" false ="" includeAllCtgs} + mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi >>> output { From b409ca9ed22505252a4ddf8f451eb9b55be530f1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:12:07 +0100 Subject: [PATCH 556/668] More realistic resource requirements for modkit --- modkit.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 382bfc09..92905f06 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -39,8 +39,8 @@ task Pileup { String logFilePath = "modkit.log" Int threads = 8 - String memory = "48GiB" - Int timeMinutes = 4320 # 3 Days + String memory = "4GiB" + Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From bdab5a4c0d0e8474bea79435cc128e50fe5109d2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:12:27 +0100 Subject: [PATCH 557/668] More specific bed file naming --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 92905f06..23269bf3 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -24,7 +24,7 @@ task Pileup { input { File bam File bamIndex - String outputBed = "output.bed" + String outputBed = "output.methyl.bed" File referenceFasta File referenceFastaFai From c79ebd4affcc6524e671da9d6d63f98c9d3674c8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:13:54 +0100 Subject: [PATCH 558/668] Correct file extension for modkit --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 23269bf3..930b6de9 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -24,7 +24,7 @@ task Pileup { input { File bam File bamIndex - String outputBed = "output.methyl.bed" + String outputBed = "output.bedMethyl" File referenceFasta File referenceFastaFai From 1580aae26fbec6b819d0a905959dbad7acf6fd63 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:14:19 +0100 Subject: [PATCH 559/668] Correct whitespacing Co-authored-by: Davy Cats --- sequali.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/sequali.wdl b/sequali.wdl index cbd3d869..b43cf281 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -53,6 +53,7 @@ task Sequali { docker: dockerImage time_minutes: timeMinutes } + parameter_meta { # inputs reads: {description: "A FASTQ or BAM file.", category: "required"} From 63dceb22e11e16a45f8ac04f1c466100e8a263f6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Feb 2025 16:24:21 +0100 Subject: [PATCH 560/668] Start on a VEP task --- vep.wdl | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 vep.wdl diff --git a/vep.wdl b/vep.wdl new file mode 100644 index 00000000..83eeac4e --- /dev/null +++ b/vep.wdl @@ -0,0 +1,74 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Vep { + input { + File inputFile + String outputPath = "vep.annotated.vcf.gz" + File cacheTar + File? pluginsTar + String? species + Array[String] plugins = [] + Boolean refseq = false + Boolean merged = false + + Boolean everything = false + Boolean symbol = false + + } + + command <<< + set -e + mkdir vep_cache + tar -x --directory vep_cache -f ~{cacheTar} + ~{"tar -x --directory vep_cache -f " + pluginsTar} + + # Output all stats files by default for MultiQC integration + vep \ + --input_file ~{inputFile} \ + ~{"--species " + species} \ + --stats_html --stats_text \ + --dir vep_cache \ # Output all stats files by default for MultiQC integration + + --offline \ + ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + --vcf \ + --compress-output bgzip \ + ~{true="--refseq" false="" refseq} \ + ~{true="--merged" false="" merged} \ + \ + ~{true="--everything" false="" everything} \ + ~{true="--symbol" false="" symbol} \ + + + # Cleanup the tar extract to save filesystem space + rm -rf vep_cache + + + >>> + + output { + File outputFile = outputPath + File statsHtml = outputPath + "_summary.html" + } + +} \ No newline at end of file From 405395d512611775ed38021d79b3f4f570d0f23e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 14:31:23 +0100 Subject: [PATCH 561/668] Add runtime requirements --- vep.wdl | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 83eeac4e..496a6b8f 100644 --- a/vep.wdl +++ b/vep.wdl @@ -34,7 +34,10 @@ task Vep { Boolean everything = false Boolean symbol = false - } + String memory = "8GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" + } command <<< set -e @@ -71,4 +74,15 @@ task Vep { File statsHtml = outputPath + "_summary.html" } -} \ No newline at end of file + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From b6107be5cdfaf396e53f25f2d93b6220d1f14eb7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:06:54 +0100 Subject: [PATCH 562/668] Take into account cache tar size for runtime --- vep.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 496a6b8f..4cec3fa3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -35,7 +35,8 @@ task Vep { Boolean symbol = false String memory = "8GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + # Account time for unpacking the cache. + Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } From 5401a6050c9c288f20569b1ffb943f1a05b19d19 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:20:41 +0100 Subject: [PATCH 563/668] Cleanup command --- vep.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vep.wdl b/vep.wdl index 4cec3fa3..f9e7a4a0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -36,7 +36,7 @@ task Vep { String memory = "8GiB" # Account time for unpacking the cache. - Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) + Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } @@ -51,8 +51,7 @@ task Vep { --input_file ~{inputFile} \ ~{"--species " + species} \ --stats_html --stats_text \ - --dir vep_cache \ # Output all stats files by default for MultiQC integration - + --dir vep_cache \ --offline \ ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ --vcf \ From 701b819d7bebab81385dbd3c159f31ab37e5961b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:41:20 +0100 Subject: [PATCH 564/668] Add missing ~ --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index f9e7a4a0..636a8ce0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -53,7 +53,7 @@ task Vep { --stats_html --stats_text \ --dir vep_cache \ --offline \ - ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ --compress-output bgzip \ ~{true="--refseq" false="" refseq} \ From e4654bc7be895cdf5fc80c02fdbfb84b8941d2aa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:51:53 +0100 Subject: [PATCH 565/668] properly format commandline option --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 636a8ce0..626257a3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -55,7 +55,7 @@ task Vep { --offline \ ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ - --compress-output bgzip \ + --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ \ From bda5ff43ad460a51adcfa9daeb3432ec2156c80d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:21:23 +0100 Subject: [PATCH 566/668] Fix trailing whitespace --- vep.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 626257a3..f2ca4a6e 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,7 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ - ~{"--species " + species} \ + ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ --offline \ @@ -58,7 +58,6 @@ task Vep { --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ - \ ~{true="--everything" false="" everything} \ ~{true="--symbol" false="" symbol} \ From 967934c2fd0a4a4f29e4ad87475cd9c68a22298a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:39:43 +0100 Subject: [PATCH 567/668] Add missing output file param --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index f2ca4a6e..064cf41a 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,6 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ + --output_file ~{outputPath} \ ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ @@ -71,6 +72,7 @@ task Vep { output { File outputFile = outputPath File statsHtml = outputPath + "_summary.html" + File statsTxt = outputPath + "_summary.txt" } runtime { From 115f3cfc0da031309a42a5a02d0825a06e1d3e85 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 17:03:00 +0100 Subject: [PATCH 568/668] Make sure output directory is made --- vep.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/vep.wdl b/vep.wdl index 064cf41a..7fb6a660 100644 --- a/vep.wdl +++ b/vep.wdl @@ -43,6 +43,7 @@ task Vep { command <<< set -e mkdir vep_cache + mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} From f29492641550c6d2247a40d216d53c5030d7983d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:22:49 +0100 Subject: [PATCH 569/668] Complete VEP task --- vep.wdl | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/vep.wdl b/vep.wdl index 7fb6a660..8a5a443b 100644 --- a/vep.wdl +++ b/vep.wdl @@ -41,12 +41,14 @@ task Vep { } command <<< - set -e + set -eu mkdir vep_cache mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} + # Make sure vep can error, so the removal always succeeds. + set +e # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ @@ -61,13 +63,14 @@ task Vep { ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ ~{true="--everything" false="" everything} \ - ~{true="--symbol" false="" symbol} \ - + ~{true="--symbol" false="" symbol} + VEP_EXIT_CODE=$? + set -e # Cleanup the tar extract to save filesystem space rm -rf vep_cache - + exit $VEP_EXIT_CODE >>> output { @@ -83,8 +86,23 @@ task Vep { } parameter_meta { + # input + inputFile: {description: "The VCF to annotate.", category: "required"} + outputPath: {description: "Where to put the output file", category: "advanced"} + cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} + pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + refseq: {description: "Use the refseq cache", category: "common"} + merged: {description: "Use the merged cache", category: "common"} + everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} + symbol: {description: "Add the gene symbol to the output where available", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + outputFile: {description: "The annotated VEP VCF file."} + statsHtml: {description: "The VEP summary stats HTML file."} + statsTxt: {description: "The VEP summary stats TXT file."} } } From eca4681a0baf841dc2fffc2ca3f22930822740a5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:45:48 +0100 Subject: [PATCH 570/668] Add VEP to the changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1276efaa..378731bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add VEP task. + Add Sequali task. + Add Clair3 task. + Add Modkit task. From 203d178e3ea80abef927e7f1ac67d00fec93ff75 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:15:50 +0100 Subject: [PATCH 571/668] Add missing parameter_meta for VEP --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index 8a5a443b..349242fb 100644 --- a/vep.wdl +++ b/vep.wdl @@ -91,6 +91,8 @@ task Vep { outputPath: {description: "Where to put the output file", category: "advanced"} cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + species: {description: "Which species cache to use", category: "common"} + plugins: {description: "Which plugins to use", category: "common"} refseq: {description: "Use the refseq cache", category: "common"} merged: {description: "Use the merged cache", category: "common"} everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} From 117e5317fbb50c5989b1afd668d469569b78127e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:20:15 +0100 Subject: [PATCH 572/668] Add missing Minimap2 parameter_meta --- minimap2.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 18127cb1..da301bd3 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -183,6 +183,11 @@ task Mapping { mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} + compressionLevel: {description: "compressionLevel for the output file", category: "advanced"} + additionalSortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} + nameSorted: {description: "Output a name sorted file instead", category: "common"} + cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 319501e7ebbc0fa76baaac1d48d56294eda4b86c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:21:25 +0100 Subject: [PATCH 573/668] Add a samtools split task --- samtools.wdl | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..a82bbda1 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -514,6 +514,61 @@ task Sort { } } +task Split { + input { + File inputBam + Directory outputPath + String? unaccountedPath + String? filenameFormat = "%!.%." + String? outputFormat = "bam" + Boolean writeIndex = false + + Int threads = 1 + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + } + + command { + set -e + mkdir -p "~{outputPath}" + samtools split \ + --output-fmt ~{outputFormat} \ + -f "~{outputPath}/rg/~{filenameFormat}" \ + ~{"-u " + unaccountedPath} \ + ~{true="--write-index" false="" writeIndex} \ + ~{inputBam} + } + + output { + Array[File] split = glob(outputPath + "/rg/*." + outputFormat) + File? unaccounted = unaccountedPath + } + + runtime { + cpu: threads + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputBam: {description: "The bam file to split.", category: "required"} + outputPath: {description: "Directory to store output bams", category: "required"} + + # Optional parameters + unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "optional"} + filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "format"} + outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "format"} + writeIndex: {description: "Automatically index outputs", category: "indexing"} + + # outputs + split: {description: "BAM file split by read groups"} + unaccounted: {description: "Reads with no RG tag or an unrecognised RG tag."} + } +} + task Tabix { input { File inputFile From 60dcef74f6229d81d19436a361f3e4e6aa41ddd0 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:22:35 +0100 Subject: [PATCH 574/668] Register in changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..2993ddc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ New samtools task: split. version 5.2.0 --------------------------- From 4030091ee212be3cc040c69a61834684b8c8be0e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:27:49 +0100 Subject: [PATCH 575/668] Directory not yet available --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index a82bbda1..51230097 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -517,7 +517,7 @@ task Sort { task Split { input { File inputBam - Directory outputPath + String outputPath String? unaccountedPath String? filenameFormat = "%!.%." String? outputFormat = "bam" From 8a0de277c0b69a7607757a0c8c102a379e8e444c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:28:19 +0100 Subject: [PATCH 576/668] Must be defined --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 51230097..a2be09a4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -519,8 +519,8 @@ task Split { File inputBam String outputPath String? unaccountedPath - String? filenameFormat = "%!.%." - String? outputFormat = "bam" + String filenameFormat = "%!.%." + String outputFormat = "bam" Boolean writeIndex = false Int threads = 1 From b70891c3aea7314777aaf5122de3beadf10965e3 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 12:27:17 +0100 Subject: [PATCH 577/668] noticed in wdl-aid that only these are permitted --- samtools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index a2be09a4..2fe9a9f7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -558,10 +558,10 @@ task Split { outputPath: {description: "Directory to store output bams", category: "required"} # Optional parameters - unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "optional"} - filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "format"} - outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "format"} - writeIndex: {description: "Automatically index outputs", category: "indexing"} + unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} + filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} + outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} + writeIndex: {description: "Automatically index outputs", category: "advanced"} # outputs split: {description: "BAM file split by read groups"} From 1ec88558c5b21cb1362518b2c4af95a865abcc68 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:01:26 +0100 Subject: [PATCH 578/668] Add compression level parameter, defaulting to 1 --- samtools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 2fe9a9f7..c46ea88b 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -523,6 +523,8 @@ task Split { String outputFormat = "bam" Boolean writeIndex = false + Int compressionLevel = 1 + Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) @@ -534,6 +536,7 @@ task Split { mkdir -p "~{outputPath}" samtools split \ --output-fmt ~{outputFormat} \ + --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ ~{true="--write-index" false="" writeIndex} \ @@ -562,6 +565,7 @@ task Split { filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} writeIndex: {description: "Automatically index outputs", category: "advanced"} + compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs split: {description: "BAM file split by read groups"} From 153db04100bf78f07b898d523a6da84544d8a02b Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:01:37 +0100 Subject: [PATCH 579/668] default to indexing --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index c46ea88b..554d0903 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -521,7 +521,7 @@ task Split { String? unaccountedPath String filenameFormat = "%!.%." String outputFormat = "bam" - Boolean writeIndex = false + Boolean writeIndex = true Int compressionLevel = 1 From 1522785ae1cec9254e5bf57f942260eab2babfd4 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:08:33 +0100 Subject: [PATCH 580/668] Remove control of output format --- samtools.wdl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 554d0903..7eba529c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -520,7 +520,6 @@ task Split { String outputPath String? unaccountedPath String filenameFormat = "%!.%." - String outputFormat = "bam" Boolean writeIndex = true Int compressionLevel = 1 @@ -535,7 +534,7 @@ task Split { set -e mkdir -p "~{outputPath}" samtools split \ - --output-fmt ~{outputFormat} \ + --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ @@ -544,7 +543,7 @@ task Split { } output { - Array[File] split = glob(outputPath + "/rg/*." + outputFormat) + Array[File] splitBam = glob(outputPath + "/rg/*.bam") File? unaccounted = unaccountedPath } @@ -563,7 +562,6 @@ task Split { # Optional parameters unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} - outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} writeIndex: {description: "Automatically index outputs", category: "advanced"} compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} From 2bba90e99bbc61dc08905a569d8bbb3df285878a Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:08:42 +0100 Subject: [PATCH 581/668] include indexes --- samtools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 7eba529c..bfed7560 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -544,6 +544,7 @@ task Split { output { Array[File] splitBam = glob(outputPath + "/rg/*.bam") + Array[File] splitBamIndex = glob(outputPath + "/rg/*.bai") File? unaccounted = unaccountedPath } @@ -566,7 +567,8 @@ task Split { compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs - split: {description: "BAM file split by read groups"} + splitBam: {description: "BAM file split by read groups"} + splitBamIndex: {description: "BAM indexes"} unaccounted: {description: "Reads with no RG tag or an unrecognised RG tag."} } } From bd4a8567cdedabf6aa1e779fa1af731b09e64b49 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 15:19:02 +0100 Subject: [PATCH 582/668] write index is non-optional --- samtools.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index bfed7560..1660aac3 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -520,7 +520,6 @@ task Split { String outputPath String? unaccountedPath String filenameFormat = "%!.%." - Boolean writeIndex = true Int compressionLevel = 1 @@ -538,7 +537,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ - ~{true="--write-index" false="" writeIndex} \ + --write-index \ ~{inputBam} } @@ -563,7 +562,6 @@ task Split { # Optional parameters unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} - writeIndex: {description: "Automatically index outputs", category: "advanced"} compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs From be0aabe03a8615dad5190b5e4c4c9869bb472c2e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 15:49:15 +0100 Subject: [PATCH 583/668] make subdirectory as well --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 1660aac3..c452664c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -531,7 +531,7 @@ task Split { command { set -e - mkdir -p "~{outputPath}" + mkdir -p "~{outputPath}/rg/" samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 10e83c1c116d55d148534c7f9fc56056773aadb7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 16:03:06 +0100 Subject: [PATCH 584/668] emits csi extension instead --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index c452664c..191a99a2 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -543,7 +543,7 @@ task Split { output { Array[File] splitBam = glob(outputPath + "/rg/*.bam") - Array[File] splitBamIndex = glob(outputPath + "/rg/*.bai") + Array[File] splitBamIndex = glob(outputPath + "/rg/*.bam.csi") File? unaccounted = unaccountedPath } From 6ebf7cd161f15add1c8ed9af8f000ab0952d232c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 16:14:42 +0100 Subject: [PATCH 585/668] missing threads --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 191a99a2..19ad8dab 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -537,6 +537,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ + --threads ~{threads} \ --write-index \ ~{inputBam} } From 6f9350106827f108f7be38b0d0440a0243174664 Mon Sep 17 00:00:00 2001 From: Helena Date: Mon, 10 Mar 2025 14:00:24 +0100 Subject: [PATCH 586/668] Update samtools.wdl --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..66dc647f 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -167,7 +167,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" - Int timeMinutes = 1 + ceil(size(inputBam) * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } From 0ff8d9891a82ff8daf784b782d5007b4ed5cdd16 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 19 Mar 2025 18:33:55 +0100 Subject: [PATCH 587/668] Add link to mentioned VEP website to save time in future --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 349242fb..e99c9fdb 100644 --- a/vep.wdl +++ b/vep.wdl @@ -89,7 +89,7 @@ task Vep { # input inputFile: {description: "The VCF to annotate.", category: "required"} outputPath: {description: "Where to put the output file", category: "advanced"} - cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} + cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work (http://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html)", category: "required"} pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} species: {description: "Which species cache to use", category: "common"} plugins: {description: "Which plugins to use", category: "common"} From 3ea61f0d2fe6f16eba1afde9255c15bc368975dd Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 25 Mar 2025 14:56:01 +0100 Subject: [PATCH 588/668] Add a samtools quickcheck task which returns the input bam. This is designed to enable us to more quickly catch problematic BAMs, and fail earlier in the pipeline than after we've wasted some significant compute time. --- CHANGELOG.md | 1 + samtools.wdl | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..a41b47cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Added `samtools.Quickcheck` to allow failing on truncated files early. version 5.2.0 --------------------------- diff --git a/samtools.wdl b/samtools.wdl index 66dc647f..ea615bae 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -452,6 +452,46 @@ task Merge { } } +task Quickcheck { + input { + File inputBam + + Int threads = 1 + Int memoryGb = 1 + Int timeMinutes = 1 + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + } + + command { + set -e + samtools quickcheck ~{inputBam} + } + + output { + File outputBam = inputBam + } + + runtime { + cpu: threads + memory: "~{memoryGb}GiB" + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM/SAM/CRAM file.", category: "required"} + + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "The exact same input file, but use this so it is recognised as a dependent task."} + } +} + task Sort { input { File inputBam From 38c5c9ad46e56e6c6e04853bc278e07c24221a28 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 25 Mar 2025 14:20:20 +0100 Subject: [PATCH 589/668] Collate fastq file before splitting It was reported to me that the _R1/_R2 from `samtools fastq` were not collated properly, that a single read was appearing in two wildly different places in R1/R2 which is completely silly. I have tried to reproduce this but thus far have been unable to: $ samtools view -b FILE.bam chrM > tmp.bam $ du -h tmp.bam 560K tmp.bam $ samtools fastq -1 paired1.fq -2 paired2.fq -0 /dev/null -s /dev/null -n tmp.bam [M::bam2fq_mainloop] discarded 480 singletons [M::bam2fq_mainloop] processed 608 reads $ diff <(grep ^@D paired1.fq) <(grep ^@D paired2.fq) $ Note the complete lack of difference between ordering. But if we look at the output of files which have come out of this tool, there are clear differences: $ zless R1.fastq.gz | grep '^@' | head -n 3 @D_____________________:1108:3364:16050 @D_____________________:2113:10647:9989 @D_____________________:2208:9374:82968 $ zless R2.fastq.gz | grep '^@' | head -n 3 @D_____________________:1108:3364:16050 @D_____________________:1214:3361:56060 @D_____________________:1309:8329:98995 these were produced by the command $ set -e $ mkdir -p "$(dirname split/R1.fastq.gz)" $ samtools fastq \ -1 split/R1.fastq.gz \ -2 split/R2.fastq.gz \ -n \ --threads 1 \ /mnt/miniwdl/out.bam This is indeed documented behaviour however: > If the input contains read-pairs which are to be interleaved or > written to separate files in the same order, then the input should be > first collated by name. Use samtools collate or samtools sort -n to > ensure this. > > https://www.htslib.org/doc/samtools-fasta.html#DESCRIPTION So it makes some sense to collate, or at some point ensure that the BAMs are sorted. I think there is a discussion to be had over whether automatic collation in sensible or a waste of runtime, but on the other hand, this is maybe a small footgun and eliminating it would make sense to reduce the potential failure modes (give our focus on reducing risk and all.) --- CHANGELOG.md | 1 + samtools.wdl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..abf77c00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. version 5.2.0 --------------------------- diff --git a/samtools.wdl b/samtools.wdl index 66dc647f..02a5ed52 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -174,6 +174,7 @@ task Fastq { command { set -e mkdir -p "$(dirname ~{outputRead1})" + samtools collate -u -O ~{inputBam} | \ samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-2 " + outputRead2} \ @@ -184,8 +185,7 @@ task Fastq { ~{true="-N" false="-n" appendReadNumber} \ ~{true="-O" false="" outputQuality} \ ~{"-c " + compressionLevel} \ - ~{"--threads " + threads} \ - ~{inputBam} + ~{"--threads " + threads} } output { From 47efde79998bd64c25ef546e6387ff37254fa192 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 26 Mar 2025 12:23:19 +0100 Subject: [PATCH 590/668] Hardcode runtime per feedback --- samtools.wdl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index ea615bae..8bb2df87 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -456,9 +456,6 @@ task Quickcheck { input { File inputBam - Int threads = 1 - Int memoryGb = 1 - Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } @@ -472,9 +469,7 @@ task Quickcheck { } runtime { - cpu: threads - memory: "~{memoryGb}GiB" - time_minutes: timeMinutes + time_minutes: 5 docker: dockerImage } @@ -482,9 +477,6 @@ task Quickcheck { # inputs inputBam: {description: "The input BAM/SAM/CRAM file.", category: "required"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs From 9fd1c2cfb9431a31d48dab6eaadf9f14faf96326 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 26 Mar 2025 14:13:59 +0100 Subject: [PATCH 591/668] do not use default cpu/mem --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 8bb2df87..a009500c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -469,7 +469,9 @@ task Quickcheck { } runtime { + cpu: 1 time_minutes: 5 + memory: "1GiB" docker: dockerImage } From d0cc47c6421d990b2f2ed18b6ef5476cd5a19dd4 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Thu, 27 Mar 2025 17:46:10 +0100 Subject: [PATCH 592/668] Add wa/wb/s flags to bedtools intersect Fix bug whereby missing outdir would cause a failure. --- CHANGELOG.md | 1 + bedtools.wdl | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..4bd6ae1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. version 5.2.0 --------------------------- diff --git a/bedtools.wdl b/bedtools.wdl index fe18ede6..a5d8aab3 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -267,6 +267,10 @@ task Intersect { File? faidx # Giving a faidx file will set the sorted option. + Boolean writeA = false + Boolean writeB = false + Boolean stranded = false + String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -276,10 +280,14 @@ task Intersect { command { set -e + mkdir -p "$(dirname ~{outputBed})" ~{"cut -f1,2 " + faidx} ~{true="> sorted.genome" false ="" sorted} bedtools intersect \ -a ~{regionsA} \ -b ~{regionsB} \ + ~{true="-wa" false="" writeA} \ + ~{true="-wb" false="" writeB} \ + ~{true="-s" false="" stranded} \ ~{true="-sorted" false="" sorted} \ ~{true="-g sorted.genome" false="" sorted} \ > ~{outputBed} @@ -301,6 +309,11 @@ task Intersect { regionsB: {description: "Region file b to intersect.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", category: "common"} + + writeA: {description: "Write the original entry in A for each overlap.", category: "advanced"} + writeB: {description: "Write the original entry in B for each overlap. Useful for knowing what A overlaps.", category: "advanced"} + stranded: {description: "Force “strandedness”. That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From fff0fe8fe9cf1f022369dcfb05e5f4980f0f8115 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 08:52:51 +0200 Subject: [PATCH 593/668] Update pbmm2 image --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index ea7c05df..91b0b1fe 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -31,7 +31,7 @@ task Mapping { Int cores = 4 String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) - String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" + String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" } command { From 084486c19bcde6398d41381c0628f5c359c7c53b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 09:05:59 +0200 Subject: [PATCH 594/668] Add pbmm2 outputPrefix parameter --- CHANGELOG.md | 2 ++ pbmm2.wdl | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dde73d44..dd536e5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Allow pbmm2 to work with a set output prefix for the BAM file. ++ Update pbmm2 docker container to version 1.17 + Add VEP task. + Add Sequali task. + Add Clair3 task. diff --git a/pbmm2.wdl b/pbmm2.wdl index 91b0b1fe..915fbb02 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -25,6 +25,7 @@ task Mapping { String presetOption Boolean sort=true String sample + String outputPrefix = sample + ".align" File referenceMMI File queryFile @@ -35,6 +36,8 @@ task Mapping { } command { + set -e + mkdir -p ~{outputPrefix} pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ @@ -42,12 +45,12 @@ task Mapping { ~{referenceMMI} \ ~{queryFile} \ --sample ~{sample} \ - ~{sample}.align.bam + ~{outputPrefix}.bam } output { - File outputAlignmentFile = sample + ".align.bam" - File outputIndexFile = sample + ".align.bam.bai" + File outputAlignmentFile = outputPrefix + ".bam" + File outputIndexFile = outputPrefix + ".bam.bai" } runtime { @@ -62,6 +65,7 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} + outputPrefix: {description: "The prefix of the output filename before the .bam extension." category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -69,7 +73,7 @@ task Mapping { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # outputs + # output outputAlignmentFile: {description: "Mapped bam file."} outputIndexFile: {description: "Bam index file."} } From 912754990f49d74b69a170bf68901e6ecd1f9557 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:52:02 +0200 Subject: [PATCH 595/668] Use a better output prefix Co-authored-by: Davy Cats --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 915fbb02..f8abbd64 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -25,7 +25,7 @@ task Mapping { String presetOption Boolean sort=true String sample - String outputPrefix = sample + ".align" + String outputPrefix = "./~{sample}.align" File referenceMMI File queryFile From 408757f683bf02d0bcf214cd72a4aee732d520d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:52:16 +0200 Subject: [PATCH 596/668] Add missing dirname call Co-authored-by: Davy Cats --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index f8abbd64..b00e249e 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -37,7 +37,7 @@ task Mapping { command { set -e - mkdir -p ~{outputPrefix} + mkdir -p $(dirname ~{outputPrefix}) pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ From 8e008554a71cb5de37c69f80321b0d4d39dcf750 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:58:13 +0200 Subject: [PATCH 597/668] Add missing comma --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index b00e249e..73e74c0c 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -65,7 +65,7 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} - outputPrefix: {description: "The prefix of the output filename before the .bam extension." category: "advanced"} + outputPrefix: {description: "The prefix of the output filename before the .bam extension.", category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From 7d6da07cd4dbe09e42cf343e9077d0118e4d1264 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Thu, 27 Mar 2025 17:48:03 +0100 Subject: [PATCH 598/668] Deprecated bedgraph option, produce it by default --- CHANGELOG.md | 1 + modkit.wdl | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd536e5e..337a68db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ version 6.0.0-dev + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index 930b6de9..7376a567 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -25,6 +25,7 @@ task Pileup { File bam File bamIndex String outputBed = "output.bedMethyl" + String outputBedGraph = "m_CG0_combined.bedgraph" File referenceFasta File referenceFastaFai @@ -34,7 +35,6 @@ task Pileup { Boolean cpg = false Boolean combineMods = false Boolean combineStrands = false - Boolean bedgraph = false String? ignore String logFilePath = "modkit.log" @@ -42,7 +42,6 @@ task Pileup { String memory = "4GiB" Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" - } command <<< @@ -58,15 +57,17 @@ task Pileup { ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ ~{true="--combine-strands" false="" combineStrands} \ - ~{true="--bedgraph" false="" bedgraph} \ --log-filepath ~{logFilePath} \ ~{bam} \ - ~{outputBed} + - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} >>> + # You can use modkit pileup ${bam_path} - | tee out.bedmethyl | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > out.bg to get both outputs at once without running anything twice. + # https://github.com/nanoporetech/modkit/issues/210#issuecomment-2181706374 + output { - File? out = outputBed # Normal mode - Array[File] outFiles = glob(outputBed + "/*") # Bedgraph mode + File out = outputBed # Normal mode + File outFiles = outputBedGraph # Bedgraph mode File logFile = logFilePath } @@ -104,4 +105,4 @@ task Pileup { outFiles: {description: "Output files when bedgraph = true."} logFile: {description: "The generated log file."} } -} \ No newline at end of file +} From 9d2a4735bf221410b7a1b6b3ad1cd5e5edad3423 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Mar 2025 14:33:52 +0100 Subject: [PATCH 599/668] Update parameter_meta for modkit --- modkit.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 7376a567..5ba1f501 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -84,14 +84,14 @@ task Pileup { bamIndex: {description: "The index for the input alignment file", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - outputBed: {description: "The output name where the data should be placed.", category: "common"} + outputBed: {description: "The output name where the bedMethyl file should be placed.", category: "common"} + outputBedgraph: {description: "The output name where the bedgraph file should be placed", category: "common"} intervalSize: {description: "Sets the interval size", category: "advanced"} includeBed: {description: "Bed file with regions to include", category: "advanced"} cpg: {description: "Whether to call only at cpg sites", category: "advanced"} combineMods: {description: "Whether to combine modifications in the output", category: "advanced"} combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} - bedgraph: {description: "Whether to create a folder instead with a bedgraph file", category: "advanced"} ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} logFilePath: {description: "Path where the log file should be written.", category: "advanced"} From feaacf40fb1fb2edf4588d63b5baee4f8eac18a2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Mar 2025 14:37:39 +0100 Subject: [PATCH 600/668] Fix typo --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 5ba1f501..9311e4da 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -85,7 +85,7 @@ task Pileup { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputBed: {description: "The output name where the bedMethyl file should be placed.", category: "common"} - outputBedgraph: {description: "The output name where the bedgraph file should be placed", category: "common"} + outputBedGraph: {description: "The output name where the bedgraph file should be placed", category: "common"} intervalSize: {description: "Sets the interval size", category: "advanced"} includeBed: {description: "Bed file with regions to include", category: "advanced"} From 9e057d6ce259e5fc96ffb04208c37bda8b43ec3e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 31 Mar 2025 14:04:15 +0200 Subject: [PATCH 601/668] split into separate files --- modkit.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 9311e4da..78df28f4 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -25,7 +25,7 @@ task Pileup { File bam File bamIndex String outputBed = "output.bedMethyl" - String outputBedGraph = "m_CG0_combined.bedgraph" + String outputBedGraph = "combined.bedgraph" File referenceFasta File referenceFastaFai @@ -59,7 +59,9 @@ task Pileup { ~{true="--combine-strands" false="" combineStrands} \ --log-filepath ~{logFilePath} \ ~{bam} \ - - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} + - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10 >> "~{outputBedGraph}_"$4"_"$6".bedGraph"}' + # Separately generate the combined file as well, so users can have a choice. + cat ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} >>> # You can use modkit pileup ${bam_path} - | tee out.bedmethyl | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > out.bg to get both outputs at once without running anything twice. @@ -67,7 +69,8 @@ task Pileup { output { File out = outputBed # Normal mode - File outFiles = outputBedGraph # Bedgraph mode + File outGraph = outputBedGraph # Normal mode + Array[File] outFiles = glob(outputBedGraph + "*.bedGraph") # Bedgraph mode File logFile = logFilePath } From e439d58c8e9584c8957a4ecb265ce5f7de9f96ce Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 14:06:31 +0200 Subject: [PATCH 602/668] Add Mosdepth task --- CHANGELOG.md | 1 + mosdepth.wdl | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 mosdepth.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index dd536e5e..986dfd13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 + Add VEP task. diff --git a/mosdepth.wdl b/mosdepth.wdl new file mode 100644 index 00000000..0f800769 --- /dev/null +++ b/mosdepth.wdl @@ -0,0 +1,106 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Mosdepth { + input { + File bam + File bamIndex + String prefix = "./out" + + String? chrom + # --by flag takes a BED file or an integer. So there need to be two inputs in WDL's typed system. + File? byBed + Int? byWindow + File? fasta + Int? flag + Int? includeFlag + + Boolean noPerBase = false + Boolean d4 = false + Boolean fastMode = false + + Int threads = 1 + String memory = "1GiB" + Int timeMinutes = 10 + ceil(size(bam, "G")) * 4 + String dockerImage = "quay.io/biocontainers/mosdepth:0.3.10--h4e814b3_1" + } + + command <<< + set -e + mkdir -p $(dirname ~{prefix}) + mosdepth \ + --threads ~{threads} \ + ~{"--chrom " + chrom} \ + ~{"--by " + byBed} \ + ~{"--by " + byWindow} \ + ~{"--fasta " + fasta} \ + ~{true="--no-per-base" false="" noPerBase} \ + ~{true="--d4" false="" d4} \ + ~{"--flag " + flag} \ + ~{"--include-flag " + includeFlag} \ + ~{true="--fast-mode" false="" fastMode} \ + ~{prefix} ~{bam} + >>> + + output { + File globalDist = "~{prefix}.mosdepth.global.dist.txt" + File summary = "~{prefix}.mosdepth.summary.txt" + File? perBaseBed = "~{prefix}.per-base.bed.gz" + File? regionsBed = "~{prefix}.regions.bed.gz" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bam: {description: "Input BAM or CRAM file.", category: "required"} + bamIndex: {description: "Index for the input BAM or CRAM file.", category: "required"} + prefix: {description: "Output prefix.", category: "common"} + + chrom: {description: "Chromosome to restrict depth calculation.", category: "advanced"} + byBed: {description: "Bed file with windows to include for the --by flag. Should not be used together with byWindow.", category: "common"} + byWindow: {description: "Integer window size for the --by flag. Should not be used together with byBed.", category: "advanced"} + fasta: {description: "FASTA file, only necessary when CRAM input is used.", category: "advanced"} + flag: {description: "Exclude reads with any of the bits in FLAG set.", category: "advanced"} + includeFlag: {description: "Only include reads with any of the bits in FLAG set.", category: "advanced"} + + noPerBase: {description: "Don't output per-base depth. Skipping this output will speed execution.", category: "common"} + d4: {description: "output per-base depth in d4 format.", category: "advanced"} + fastMode: {description: "Don't look at internal cigar operations or correct mate overlaps (recommended for most use-cases).", category: "common"} + + threads: {description: "How many threads to use.", category: "common"} + memory: {description: "How much memory to allocate.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + globalDist: {description: "Global distribution table file."} + summary: {description: "Summary table file."} + perBaseBed: {description: "Per base coverage BED file."} + regionsBed: {description: "Per region BED file, if byBed or byWindow is used."} + } +} \ No newline at end of file From 7bcac8ea2636cbeeae247d783c0dc5558bb0955a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:22:31 +0200 Subject: [PATCH 603/668] Update all samtools images --- samtools.wdl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index d724a692..2388813e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -72,7 +72,7 @@ task DictAndFaidx { String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputFile = basename(inputFile) @@ -119,7 +119,7 @@ task Faidx { String outputDir String memory = "2GiB" - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -168,7 +168,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -232,7 +232,7 @@ task FilterShortReadsBam { String memory = "1GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -278,7 +278,7 @@ task Flagstat { String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -318,7 +318,7 @@ task Index { String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } # Select_first is needed, otherwise womtool validate fails. @@ -369,7 +369,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -408,7 +408,7 @@ task Merge { Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -456,7 +456,7 @@ task Quickcheck { input { File inputBam - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -497,7 +497,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -560,7 +560,7 @@ task Split { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -669,7 +669,7 @@ task View { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputIndexPath = basename(outputFileName) + ".bai" From 435a719147253df23cad2674736d8d699b186e77 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:56:35 +0200 Subject: [PATCH 604/668] Task updates to samtools.wdl --- CHANGELOG.md | 6 +++++ samtools.wdl | 72 +++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986dfd13..8b95b904 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Update docker images in samtools.wdl ++ Add threads and compression levels to applicable tasks. Default to + compression level 1. ++ samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is + the name of the flag. ++ Unused javaXmx parameter removed from samtools DictAndFaidx + Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 diff --git a/samtools.wdl b/samtools.wdl index 2388813e..30e938b4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -24,11 +24,13 @@ task BgzipAndIndex { input { File inputFile String outputDir - String type = "vcf" + String preset = "vcf" + Int compressLevel = 1 + Int threads = 1 String memory = "2GiB" Int timeMinutes = 1 + ceil(size(inputFile, "GiB")) - String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" + String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1" } String outputGz = outputDir + "/" + basename(inputFile) + ".gz" @@ -36,8 +38,15 @@ task BgzipAndIndex { command { set -e mkdir -p "$(dirname ~{outputGz})" - bgzip -c ~{inputFile} > ~{outputGz} - tabix ~{outputGz} -p ~{type} + bgzip \ + --threads ~{threads} \ + --compress-level ~{compressLevel} \ + -c ~{inputFile} > ~{outputGz} + + tabix \ + --preset ~{preset} \ + --threads ~{threads - 1} \ + ~{outputGz} } output { @@ -46,6 +55,7 @@ task BgzipAndIndex { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -55,7 +65,7 @@ task BgzipAndIndex { # inputs inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} - type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + preset: {description: "The preset for the file (eg. vcf or bed) to be compressed and indexed.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -69,7 +79,6 @@ task BgzipAndIndex { task DictAndFaidx { input { File inputFile - String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -102,7 +111,6 @@ task DictAndFaidx { parameter_meta { # inputs inputFile: {description: "The input fasta file.", category: "required"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -163,7 +171,7 @@ task Fastq { Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter - Int? compressionLevel + Int compressionLevel = 1 Int threads = 1 String memory = "1GiB" @@ -184,8 +192,8 @@ task Fastq { ~{"-G " + excludeSpecificFilter} \ ~{true="-N" false="-n" appendReadNumber} \ ~{true="-O" false="" outputQuality} \ - ~{"-c " + compressionLevel} \ - ~{"--threads " + threads} + -c ~{compressionLevel} \ + "--threads " ~{threads - 1} } output { @@ -276,6 +284,8 @@ task Flagstat { File inputBam String outputPath + Int threads = 1 + String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -284,7 +294,9 @@ task Flagstat { command { set -e mkdir -p "$(dirname ~{outputPath})" - samtools flagstat ~{inputBam} > ~{outputPath} + samtools flagstat \ + --threads ~{threads - 1} + ~{inputBam} > ~{outputPath} } output { @@ -292,6 +304,7 @@ task Flagstat { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -316,6 +329,8 @@ task Index { String? outputBamPath + Int threads = 1 + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -334,7 +349,9 @@ task Index { mkdir -p "$(dirname ~{outputPath})" ln ~{bamFile} ~{outputPath} || cp ~{bamFile} ~{outputPath} fi - samtools index ~{outputPath} ~{bamIndexPath} + samtools index \ + --threads ~{threads -1} \ + ~{outputPath} ~{bamIndexPath} ' } @@ -344,6 +361,7 @@ task Index { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -367,6 +385,7 @@ task Markdup { input { File inputBam String outputBamPath + Int threads = 1 Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -375,7 +394,9 @@ task Markdup { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - samtools markdup ~{inputBam} ~{outputBamPath} + samtools markdup \ + --threads ~{threads - 1} \ + ~{inputBam} ~{outputBamPath} } output { @@ -383,6 +404,7 @@ task Markdup { } runtime { + cpu: threads docker: dockerImage time_minutes: timeMinutes } @@ -405,6 +427,10 @@ task Merge { String outputBamPath = "merged.bam" Boolean force = true + Boolean combineRGHeaders = false + Boolean combinePGHeaders = false + + Int compressionLevel = 1 Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) @@ -420,6 +446,9 @@ task Merge { samtools merge \ --threads ~{threads - 1} \ ~{true="-f" false="" force} \ + -l ~{compressionLevel} \ + ~{true="-c" false="" combineRGHeaders} \ + ~{true="-p" false="" combinePGHeaders} \ ~{outputBamPath} ~{sep=' ' bamFiles} samtools index ~{outputBamPath} ~{indexPath} } @@ -514,7 +543,7 @@ task Sort { -o ~{outputPath} \ ~{inputBam} samtools index \ - -@ ~{threads} \ + --threads ~{threads - 1} \ ~{outputPath} ~{bamIndexPath} } @@ -571,7 +600,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ - --threads ~{threads} \ + --threads ~{threads - 1} \ --write-index \ ~{inputBam} } @@ -610,10 +639,10 @@ task Tabix { input { File inputFile String outputFilePath = basename(inputFile) - String type = "vcf" + String preset = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" + String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1" } # FIXME: It is better to do the indexing on VCF creation. @@ -625,7 +654,7 @@ task Tabix { then ln ~{inputFile} ~{outputFilePath} || cp ~{inputFile} ~{outputFilePath} fi - tabix ~{outputFilePath} -p ~{type} + tabix ~{outputFilePath} -p ~{preset} } output { @@ -643,7 +672,7 @@ task Tabix { # inputs inputFile: {description: "The file to be indexed.", category: "required"} outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} - type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} + preset: {description: "The preset for the file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -666,6 +695,8 @@ task View { Int? MAPQthreshold File? targetFile + Boolean fast = false # Default should be true, unless a non-BAM format is preferred. So th + Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) @@ -682,11 +713,12 @@ task View { ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ ~{true="-u " false="" uncompressedBamOutput} \ + ~{true="--fast" false="" fast} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ ~{"-q " + MAPQthreshold} \ - ~{"--threads " + (threads - 1)} \ + --threads ~{threads - 1} \ ~{"--target-file " + targetFile} \ ~{inFile} samtools index ~{outputFileName} ~{outputIndexPath} From d20b313ea01c0dc3fe318206daac4d976c22bc5b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:58:17 +0200 Subject: [PATCH 605/668] Increase mosdepth default memory --- mosdepth.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mosdepth.wdl b/mosdepth.wdl index 0f800769..43e95614 100644 --- a/mosdepth.wdl +++ b/mosdepth.wdl @@ -39,7 +39,7 @@ task Mosdepth { Boolean fastMode = false Int threads = 1 - String memory = "1GiB" + String memory = "4GiB" Int timeMinutes = 10 + ceil(size(bam, "G")) * 4 String dockerImage = "quay.io/biocontainers/mosdepth:0.3.10--h4e814b3_1" } From 046eecb3af6887d6aad1c31a4521951822683259 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:59:19 +0200 Subject: [PATCH 606/668] Allocate more time for merging --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 30e938b4..915bb848 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -433,7 +433,7 @@ task Merge { Int compressionLevel = 1 Int threads = 1 String memory = "4GiB" - Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) + Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } From f5765ffd1e75964a43da36c500741610e005c554 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 10:20:18 +0200 Subject: [PATCH 607/668] Update clair3 image --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 4d9092f2..57984a32 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -35,7 +35,7 @@ task Clair3 { Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) - String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" + String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" } String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" From bfd433dd4f698bf141c7add6cc42ea58d56ca3a2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 10:25:45 +0200 Subject: [PATCH 608/668] Update deepvariant image --- deepvariant.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 2d212000..e9e6c18c 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -35,11 +35,11 @@ task RunDeepVariant { String? outputGVcfIndex File? regions String? sampleName - Boolean? VCFStatsReport = true + Boolean VCFStatsReport = true String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.6.1" + String dockerImage = "google/deepvariant:1.8.0" } command { From cfbc34deb566ddb2ce0561168c7fb3dd3b0ae1e6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 11:30:12 +0200 Subject: [PATCH 609/668] Update several images --- CHANGELOG.md | 1 + modkit.wdl | 2 +- multiqc.wdl | 2 +- picard.wdl | 34 +++++++++++++++++----------------- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b95b904..8c13cacc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ version 6.0.0-dev + samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is the name of the flag. + Unused javaXmx parameter removed from samtools DictAndFaidx ++ Update Picard images + Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 diff --git a/modkit.wdl b/modkit.wdl index 930b6de9..6a7d9b4d 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -41,7 +41,7 @@ task Pileup { Int threads = 8 String memory = "4GiB" Int timeMinutes = 2880 / threads # 2 Days / threads - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" } diff --git a/multiqc.wdl b/multiqc.wdl index a2e32cdb..fae52178 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0" + String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/picard.wdl b/picard.wdl index 6628cf0e..fd072523 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4GiB" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -210,7 +210,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -336,7 +336,7 @@ task CollectRnaSeqMetrics { String memory = "9GiB" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -394,7 +394,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -456,7 +456,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -569,7 +569,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3GiB" - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -621,7 +621,7 @@ task GatherBamFiles { Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -684,7 +684,7 @@ task GatherVcfs { String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputVcfs, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -753,7 +753,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -836,7 +836,7 @@ task MergeVCFs { String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputVCFs, "GiB")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -892,7 +892,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17GiB" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" File? noneFile } @@ -953,7 +953,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4GiB" - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -996,7 +996,7 @@ task SortSam { # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -1058,7 +1058,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(vcfFiles, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } @@ -1108,7 +1108,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(inputVcf, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -1163,7 +1163,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 360 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { From d31f74badd4e6d8f8c1f397c4478ffa20e32437e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:02:11 +0200 Subject: [PATCH 610/668] Make resource requirements for pbmm2 and minimap2 somewhat equal --- CHANGELOG.md | 2 ++ minimap2.wdl | 8 ++++---- pbmm2.wdl | 32 +++++++++++++++++++++++--------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c13cacc..5fa636d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ version 6.0.0-dev + Unused javaXmx parameter removed from samtools DictAndFaidx + Update Picard images + Add Mosdepth task. ++ pbmm2 loses the sort parameter. Output is now always sorted. ++ pbmm2 gets an unmapped parameter. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 + Add VEP task. diff --git a/minimap2.wdl b/minimap2.wdl index da301bd3..a7584beb 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -86,8 +86,6 @@ task Mapping { File queryFile Int compressionLevel = 1 - Int additionalSortThreads = 1 - Int sortMemoryGb = 1 Boolean nameSorted = false # MM, ML, MN -> Methylation flags # Also keep the following flags for Sequali to be able to run on the mapped bam file and get ONT information. @@ -112,6 +110,8 @@ task Mapping { String? howToFindGTAG String? readgroup + Int sortThreads = 2 + Int sortMemoryGb = 1 Int cores = 8 String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) @@ -147,7 +147,7 @@ task Mapping { - \ | samtools sort \ ~{true="-N" false="" nameSorted} \ - -@ ~{additionalSortThreads} \ + --threads ~{sortThreads - 1} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam @@ -184,7 +184,7 @@ task Mapping { tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} compressionLevel: {description: "compressionLevel for the output file", category: "advanced"} - additionalSortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} nameSorted: {description: "Output a name sorted file instead", category: "common"} diff --git a/pbmm2.wdl b/pbmm2.wdl index 73e74c0c..23133278 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -23,25 +23,36 @@ version 1.0 task Mapping { input { String presetOption - Boolean sort=true + Boolean unmapped = false String sample String outputPrefix = "./~{sample}.align" File referenceMMI File queryFile - Int cores = 4 - String memory = "30GiB" - Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) + Int sortMemoryGb = 1 + Int sortThreads = 2 + Int cores = 8 + String memory = "24GiB" + # Slightly higher than minimap2 as compression level can not be set. + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 400 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" } + # Use cores+sortThreads to set the number of threads. Internally pbmm2 + # allocates cores - sortThreads to alignment. This leads to underutilization + # of the requested resources. Sorting uses very little CPU until the point + # comes that the memory is full and the temporary file needs to be written. + # At this point the alignment halts because the pipe is full. command { set -e mkdir -p $(dirname ~{outputPrefix}) pbmm2 align \ --preset ~{presetOption} \ - ~{true="--sort" false="" sort} \ - -j ~{cores} \ + --sort \ + ~{true="--unmapped" false="" unmapped} \ + --num-threads ~{cores + sortThreads} \ + --sort-memory ~{sortMemoryGb}G \ + --sort-threads ~{sortThreads} \ ~{referenceMMI} \ ~{queryFile} \ --sample ~{sample} \ @@ -63,15 +74,18 @@ task Mapping { parameter_meta { # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "required"} - sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} outputPrefix: {description: "The prefix of the output filename before the .bam extension.", category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + unmapped: {description: "Include unmapped reads in the output.", category: "common"} + + sortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} # output outputAlignmentFile: {description: "Mapped bam file."} From 046947847255c3323524f1c92004a66ec026b7c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:21:51 +0200 Subject: [PATCH 611/668] Increase default thread count for samtools merge --- CHANGELOG.md | 1 + samtools.wdl | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fa636d8..0781e4b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Samtools merge default thread count increased to 8. + Update docker images in samtools.wdl + Add threads and compression levels to applicable tasks. Default to compression level 1. diff --git a/samtools.wdl b/samtools.wdl index 915bb848..7a2223f6 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -431,7 +431,8 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - Int threads = 1 + # Merging is often a bottleneck. Set a high number of threads to decrease wall clock time. + Int threads = 8 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From b063b9ba79e41f3d20c64ded779a2953a1f7ec55 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:53:28 +0200 Subject: [PATCH 612/668] more time for clair3 --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 57984a32..5a6154af 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,7 +34,7 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + Int timeMinutes = 10 + ceil(size(bam, "G") * 400 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" } From d502298c8ec0e594cace54e573e68b2e7a4d9041 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 14:54:27 +0200 Subject: [PATCH 613/668] Make sequali runtime dependent on input file size --- sequali.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sequali.wdl b/sequali.wdl index b43cf281..cbca3653 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -29,7 +29,7 @@ task Sequali { Int threads = 2 String memory = "4GiB" String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" - Int timeMinutes = 59 + Int timeMinutes = 10 + ceil(size(reads, "GiB") + size(mate_reads, "GiB")) * 4 } command <<< From b942c7ed0a833c830aabb227a15d78ca89aecc3e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 14:58:12 +0200 Subject: [PATCH 614/668] Slightly higher requirements for pbmm2 than minimap2 --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 23133278..9155e7b2 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -32,7 +32,7 @@ task Mapping { Int sortMemoryGb = 1 Int sortThreads = 2 Int cores = 8 - String memory = "24GiB" + String memory = "30GiB" # Slightly higher than minimap2 as compression level can not be set. Int timeMinutes = 1 + ceil(size(queryFile, "G") * 400 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" From d2ac7b2ad030a00d83aa5a0100f79ec5e16dd5d1 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 2 Apr 2025 18:20:33 +0200 Subject: [PATCH 615/668] Add filterThreshold, filterPercent to modkit pileup --- modkit.wdl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 78df28f4..a611a620 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -31,6 +31,8 @@ task Pileup { Int? intervalSize File? includeBed + String? filterThreshold + String? filterPercentile Boolean cpg = false Boolean combineMods = false @@ -57,6 +59,8 @@ task Pileup { ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ ~{true="--combine-strands" false="" combineStrands} \ + ~{"--filter-percentile " + filterPercentile} \ + ~{"--filter-threshold " + filterThreshold} \ --log-filepath ~{logFilePath} \ ~{bam} \ - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10 >> "~{outputBedGraph}_"$4"_"$6".bedGraph"}' @@ -97,12 +101,14 @@ task Pileup { combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} logFilePath: {description: "Path where the log file should be written.", category: "advanced"} + filterThreshold: {description: "Global filter threshold can be specified with by a decimal number (e.g. 0.75). Otherwise the automatic filter percentile will be used.", category: "advanced"} + filterPercentile: {description: "This defaults to 0.1, to remove the lowest 10% confidence modification calls, but can be manually adjusted", category: "advanced"} threads: {description: "The number of threads to use for variant calling.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - + # output out: {description: "The output bed files. Not available when bedgraph = true."} outFiles: {description: "Output files when bedgraph = true."} From 204821385c3d176c3425d7052b6f3905ff46541d Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 2 Apr 2025 18:21:11 +0200 Subject: [PATCH 616/668] Add a summary task --- CHANGELOG.md | 2 ++ modkit.wdl | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ed79b5b..57519f04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ version 6.0.0-dev + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. + Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. ++ Add support for filterThreshold/filterPercent for `modkit.Pileup`. ++ Add `modkit.Summary` task. version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index a611a620..7546458a 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -115,3 +115,65 @@ task Pileup { logFile: {description: "The generated log file."} } } + +task Summary { + input { + File bam + File bamIndex + + String summary = "modkit.summary.txt" + + Boolean sample = true + Int? numReads # = 10042 + Float? samplingFrac # = 0.1 + Int? seed + + Int threads = 4 + String memory = ceil(size(bam, "GiB") * 0.20) + 10 # Based on a linear model with some fudge (y=-0.13x - 4). + Int timeMinutes = 2880 / threads # 2 Days / threads + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p $(dirname ~{summary}) + + modkit summary \ + --threads ~{threads} \ + ~{true="" false="--no-sampling" sample} \ + ~{"--num-reads " + numReads} \ + ~{"--sampling-frac " + samplingFrac} \ + ~{"--seed " + seed} \ + ~{bam} > ~{summary} + >>> + + output { + File summaryReport = summary # Normal mode + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} From a9ec6faf3de64e110209ed2c81b1272e765a6247 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Apr 2025 13:24:46 +0200 Subject: [PATCH 617/668] Downgrade deepvariant because of a bug --- deepvariant.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index e9e6c18c..c700416f 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -39,7 +39,9 @@ task RunDeepVariant { String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.8.0" + # Version 1.8.0 has a bug. + # https://github.com/google/deepvariant/issues/912 + String dockerImage = "google/deepvariant:1.6.1" } command { From 741f9708383ff29d0f6f548f9fffad0b8eb7ab37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Apr 2025 13:26:56 +0200 Subject: [PATCH 618/668] Increase time limit for VEP --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index e99c9fdb..2c1f923b 100644 --- a/vep.wdl +++ b/vep.wdl @@ -36,7 +36,7 @@ task Vep { String memory = "8GiB" # Account time for unpacking the cache. - Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 3) + Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 15) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } From 4fe49b8ef3f1bae978b2fa07ac6e08a282e2f91f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 09:56:43 +0200 Subject: [PATCH 619/668] Update samtools parameter_meta --- CHANGELOG.md | 2 +- samtools.wdl | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0781e4b1..1180578a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ version 6.0.0-dev --------------------------- + Samtools merge default thread count increased to 8. + Update docker images in samtools.wdl -+ Add threads and compression levels to applicable tasks. Default to ++ Add threads and compression levels to applicable tasks in samtools. Default to compression level 1. + samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is the name of the flag. diff --git a/samtools.wdl b/samtools.wdl index 7a2223f6..cd24e6e9 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -69,6 +69,8 @@ task BgzipAndIndex { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + compressLevel: {description: "Set compression level.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs compressed: {description: "Compressed input file."} @@ -317,6 +319,7 @@ task Flagstat { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs flagstat: {description: "The number of alignments for each FLAG type."} @@ -374,6 +377,7 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs indexedBam: {description: "BAM file that was indexed."} @@ -415,6 +419,7 @@ task Markdup { outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs outputBam: {description: "BAM file with duplicate alignments marked."} @@ -471,6 +476,10 @@ task Merge { bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + + combineRGHeaders: {description: "Combine @RG headers with colliding IDs", category: "advanced"} + combinePGHeaders: {description: "Combine @PG headers with colliding IDs", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -567,7 +576,7 @@ task Sort { sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -696,7 +705,7 @@ task View { Int? MAPQthreshold File? targetFile - Boolean fast = false # Default should be true, unless a non-BAM format is preferred. So th + Boolean fast = true # Sets compression level to 1. Int threads = 1 String memory = "1GiB" @@ -707,14 +716,15 @@ task View { String outputIndexPath = basename(outputFileName) + ".bai" # Always output to bam and output header. + # -u should be after --fast, and will override it in that case. command { set -e mkdir -p "$(dirname ~{outputFileName})" samtools view -b \ ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ - ~{true="-u " false="" uncompressedBamOutput} \ ~{true="--fast" false="" fast} \ + ~{true="-u " false="" uncompressedBamOutput} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -741,6 +751,7 @@ task View { # inputs inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} + fast: {description: "Sets compression level to 1. Set to true by default.", category: "common"} uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} From 6a78f520a6efee6def3fcc257f5ea3be02daf8cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:21:57 +0200 Subject: [PATCH 620/668] Increase deep variant shards and explain memory usage --- deepvariant.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index c700416f..b0ed2a19 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -30,13 +30,17 @@ task RunDeepVariant { String outputVcf = "sample.vcf.gz" String? postprocessVariantsExtraArgs File? customizedModel - Int numShards = 4 + Int numShards = 8 String? outputGVcf String? outputGVcfIndex File? regions String? sampleName Boolean VCFStatsReport = true + # Most of the memory used is at the end, in the step where the variants + # are merged. This is a single-threaded high memory step. The number + # of shards does not influence the memory so much. + # The provided memory here is enough for merge human chromosome 1. String memory = "48GiB" Int timeMinutes = 5000 # Version 1.8.0 has a bug. From ba35d987ca3fe3c27a01034d60cd2ab09369ab31 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:36:03 +0200 Subject: [PATCH 621/668] Set a lower number of threads for samtools merge to decrease waste --- samtools.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index cd24e6e9..cb8dbd55 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -436,8 +436,9 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - # Merging is often a bottleneck. Set a high number of threads to decrease wall clock time. - Int threads = 8 + # Merging is often a bottleneck. With compression level 1 however, + # more than three threads does not add more benefit. + Int threads = 3 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From 8b41a7feddf6e1f29af7fd825cad6a0ae6811687 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:45:14 +0200 Subject: [PATCH 622/668] Dynamically set samtools merge threads --- samtools.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index cb8dbd55..7dd9ecc1 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -436,9 +436,8 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - # Merging is often a bottleneck. With compression level 1 however, - # more than three threads does not add more benefit. - Int threads = 3 + # Use one thread per input + one for the output + one for merging + Int threads = length(bamFiles) + 2 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From 17cf284d2c54212b29cdf4e6a347adc0e0a0c458 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 16:38:42 +0200 Subject: [PATCH 623/668] Also use threads for faster indexing --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 7dd9ecc1..811f56e0 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -456,7 +456,7 @@ task Merge { ~{true="-c" false="" combineRGHeaders} \ ~{true="-p" false="" combinePGHeaders} \ ~{outputBamPath} ~{sep=' ' bamFiles} - samtools index ~{outputBamPath} ~{indexPath} + samtools index -@ ~{threads - 1} ~{outputBamPath} ~{indexPath} } output { From 1fae30492bdff1af750ac963d565cbb16cc6572b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 16:53:59 +0200 Subject: [PATCH 624/668] Add missing parameter_meta --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 811f56e0..743fce0c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -480,6 +480,8 @@ task Merge { combineRGHeaders: {description: "Combine @RG headers with colliding IDs", category: "advanced"} combinePGHeaders: {description: "Combine @PG headers with colliding IDs", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d485e17399c3482aa109e0d1055c2b2bac9d93a4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Apr 2025 08:30:54 +0200 Subject: [PATCH 625/668] Update CHANGELOG.md --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1180578a..96adc8fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- -+ Samtools merge default thread count increased to 8. ++ MultiQC image updated to version 1.28 ++ Samtools merge now has options added for merging RG and PG headers. ++ Samtools merge default thread count increased based on the number of files. + Update docker images in samtools.wdl + Add threads and compression levels to applicable tasks in samtools. Default to compression level 1. From 847ad71a26b3a1ddc1fc06c2fda349fc620ad2b5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 19:40:40 +0200 Subject: [PATCH 626/668] Update vt to allow a filter expression and compressed indexed output --- CHANGELOG.md | 1 + vt.wdl | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96adc8fa..dfa40b75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. + Samtools merge default thread count increased based on the number of files. diff --git a/vt.wdl b/vt.wdl index 4da2d8cd..4ced1d2a 100644 --- a/vt.wdl +++ b/vt.wdl @@ -27,27 +27,39 @@ task Normalize { File referenceFasta File referenceFastaFai Boolean ignoreMaskedRef = false - String outputPath = "./vt/normalized_decomposed.vcf" + String outputPath = "./vt/normalized_decomposed.vcf.gz" + String? filterExpression + + Int compressionLevel = 1 String memory = "4GiB" - Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + Int timeMinutes = 10 + ceil(size(inputVCF, "GiB") * 240) + String dockerImage = "quay.io/biocontainers/vt:0.57721--h2419454_12" } command { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" - vt normalize ~{inputVCF} \ + vt view -h \ + ~{"-f " + filterExpression} \ + ~{inputVCF} \ + | vt normalize - \ -r ~{referenceFasta} \ ~{true="-m " false="" ignoreMaskedRef} \ - | vt decompose -s - -o ~{outputPath} + | vt decompose -s - \ + | vt view - \ + -c ~{compressionLevel} \ + -o ~{outputPath} + vt index ~{outputPath} } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" } runtime { + cpu: 1 memory: memory time_minutes: timeMinutes docker: dockerImage @@ -61,11 +73,15 @@ task Normalize { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + filterExpression: {description: "See https://genome.sph.umich.edu/wiki/Vt#Filters for valid expressions.", category: "common"} + compressionLevel: {description: "Compression level for the out vcf.gz file.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Normalized & decomposed VCF file."} + outputVcf: {description: "Normalized and decomposed VCF file."} + outputVcfIndex: {description: "Index for normalized and decomposed VCF file."} } } From 57018dd55c43af0013f48a61e5119128ccd87d3f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Apr 2025 11:28:56 +0200 Subject: [PATCH 627/668] Properly quote vt filter --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 4ced1d2a..635641e9 100644 --- a/vt.wdl +++ b/vt.wdl @@ -41,7 +41,7 @@ task Normalize { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" vt view -h \ - ~{"-f " + filterExpression} \ + ~{"-f '" + filterExpression}~{true="'" false="" defined(filterExpression)} \ ~{inputVCF} \ | vt normalize - \ -r ~{referenceFasta} \ From e39fe10360989d5074580034a4df030e16d27f4c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:08:55 +0200 Subject: [PATCH 628/668] do not intermingle singletons --- samtools.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index d724a692..ef89477d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -157,6 +157,7 @@ task Fastq { String outputRead1 String? outputRead2 String? outputRead0 + String? outputReadS Boolean appendReadNumber = false Boolean outputQuality = false @@ -177,8 +178,10 @@ task Fastq { samtools collate -u -O ~{inputBam} | \ samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ + ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ + ~{"-s " + outputReadS} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -192,6 +195,7 @@ task Fastq { File read1 = outputRead1 File? read2 = outputRead2 File? read0 = outputRead0 + File? readS = outputReadS } runtime { @@ -207,6 +211,7 @@ task Fastq { outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} + outputReadS: {description: "The location singleton reads should be written to.", category: "advanced"} appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`.", category: "advanced"} outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`.", category: "advanced"} From b9319418b7a96a0046b9c034649930ccd5cf4fa9 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:09:16 +0200 Subject: [PATCH 629/668] The caches in containers caused issues --- samtools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index ef89477d..315a00b5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,6 +571,9 @@ task Split { command { set -e mkdir -p "~{outputPath}/rg/" + + export XDG_CACHE_HOME=$PWD/.cache/ + export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 8ede8b774a0296fe484e9f78e25d5d358828099e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:15:01 +0200 Subject: [PATCH 630/668] add biopets validate fastq --- biopet.wdl | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 biopet.wdl diff --git a/biopet.wdl b/biopet.wdl new file mode 100644 index 00000000..ea8a36c8 --- /dev/null +++ b/biopet.wdl @@ -0,0 +1,60 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +task ValidateFastq { + input { + File inputRead1 + File? inputRead2 + + String memory = "1GiB" + Int timeMinutes = 5 + ceil(size(inputRead1, "GiB")) + String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--hdfd78af_3" + } + + command { + set -e + java -jar /usr/local/share/biopet-validatefastq-0.1.1-3/validatefastq-assembly-0.1.1.jar \ + --fastq1 ~{inputRead1} \ + ~{"--fastq2 " + inputRead2} + } + + output { + } + + runtime { + cpu: 1 + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputRead1: {description: "The location of the first FASTQ file (first reads for pairs, in case of paired-end sequencing).", category: "required"} + inputRead2: {description: "The location of the paired end reads.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 8493c77e477c5522b0947948b47e35be04974fc7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:19:23 +0200 Subject: [PATCH 631/668] require being explicit about locations --- samtools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 315a00b5..5bb2fb82 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -177,7 +177,6 @@ task Fastq { mkdir -p "$(dirname ~{outputRead1})" samtools collate -u -O ~{inputBam} | \ samtools fastq \ - ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ From 347ed91d4bff4306cea0074ca7f1c7fa2ff517b7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:20:09 +0200 Subject: [PATCH 632/668] Probably unnecessary --- samtools.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 5bb2fb82..0ef1419c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,8 +571,6 @@ task Split { set -e mkdir -p "~{outputPath}/rg/" - export XDG_CACHE_HOME=$PWD/.cache/ - export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 7ff2ac2c1ebab33a3872297beb189e648eb90724 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:22:10 +0200 Subject: [PATCH 633/668] documentation --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986dfd13..f20dc82d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,8 @@ version 6.0.0-dev + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Add `biopet.ValidateFastq` to check your fastq files for pairing and other correctness. ++ **Breaking**: `samtools.Fastq` now requires defining your singleton read location. This only affects you if you were previously using this task with only a single output read file. version 5.2.0 --------------------------- From 1ee07a657fd46f6dc227573c2c59d7ef4d0cd4b9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 10:49:41 +0200 Subject: [PATCH 634/668] add -no-upstream to snpeff task --- snpeff.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index 0f14e5b5..8718e01b 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -32,6 +32,7 @@ task SnpEff { Boolean hgvs = true Boolean lof = true Boolean noDownstream = false + Boolean noUpstream = false Boolean noIntergenic = false Boolean noShiftHgvs = false Int? upDownStreamLen @@ -39,7 +40,7 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" + String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" } command { @@ -55,6 +56,7 @@ task SnpEff { ~{true="-hgvs" false="-noHgvs" hgvs} \ ~{true="-lof" false="-noLof" lof} \ ~{true="-no-downstream" false="" noDownstream} \ + ~{true="-no-upstream" false="" noUpstream} \ ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ @@ -82,6 +84,7 @@ task SnpEff { hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"} lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"} noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"} + noUpstream: {description: "Equivalent to the `-no-upstream` flag.", category: "advanced"} noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"} noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"} upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"} From d9d989e07649ac3177f6464100e192418e716ce3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:22:36 +0200 Subject: [PATCH 635/668] Add snpsift filter --- CHANGELOG.md | 2 ++ snpsift.wdl | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 snpsift.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa40b75..7e209f1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Added a task for SnpSift filter. ++ Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. diff --git a/snpsift.wdl b/snpsift.wdl new file mode 100644 index 00000000..0bb413f6 --- /dev/null +++ b/snpsift.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Filter { + input { + File vcf + File? vcfIndex + String filterExpression + String outputPath = "./snpsift_filter.vcf" + + String memory = "9GiB" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + } + + command { + SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + filter \ + "~{filterExpression}" \ + ~{vcf} \ + > ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes # !UnknownRuntimeKey + memory: memory + } + + parameter_meta { + vcf: {description: "A VCF file to filter.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "common"} + filterExpression: {description: "The SnpSift filtering expression.", category: "required"} + outputPath: {description: "The path to write the output to.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From d4eb18d70d68e5c75539c272bf0db065e5f0bf71 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:35:56 +0200 Subject: [PATCH 636/668] add region input to bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 5 ++++- snpsift.wdl | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e209f1a..f13ab24b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. diff --git a/bcftools.wdl b/bcftools.wdl index 7df8911d..11864a00 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -349,6 +349,7 @@ task View { String? exclude String? include + String? region Array[String] samples = [] String memory = "256MiB" @@ -368,7 +369,8 @@ task View { ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ - ~{inputFile} + ~{inputFile} \ + ~{region} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } @@ -390,6 +392,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + region: {description: "The region to retrieve from the VCF file.", category: "common"} excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} samples: {description: "A list of sample names to include.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/snpsift.wdl b/snpsift.wdl index 0bb413f6..5bac6484 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -36,6 +36,8 @@ task Filter { } command { + set -e + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ "~{filterExpression}" \ From ecd2242e9a71f352a6b11683a969f9f5804cb18d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 12:14:13 +0200 Subject: [PATCH 637/668] add an ipnut for an index file in bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f13ab24b..7d5ad41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. diff --git a/bcftools.wdl b/bcftools.wdl index 11864a00..b923781c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -344,6 +344,7 @@ task Stats { task View { input { File inputFile + File? inputFileIndex String outputPath = "output.vcf" Boolean excludeUncalled = false @@ -389,6 +390,7 @@ task View { parameter_meta { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} + inputFileIndex: {description: "the index for the input file.", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} From abcddcda79a0821ef86bb0d1b40f2e5b7264e829 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 11:56:23 +0200 Subject: [PATCH 638/668] fix wdlTools parsing issue in bcftools annotate --- CHANGELOG.md | 2 ++ bcftools.wdl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d5ad41d..7ad69a3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Fixed an issue with the parameter_meta section of bcftools annotate + which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. diff --git a/bcftools.wdl b/bcftools.wdl index b923781c..6200a1a1 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -111,7 +111,7 @@ task Annotate { collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} - newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} + newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\\_%POS').", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} From 748fe367e1964e5014cdb60a3def6976f2846d3c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 14:51:52 +0200 Subject: [PATCH 639/668] change name of snpsift task --- snpsift.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpsift.wdl b/snpsift.wdl index 5bac6484..6b6a1feb 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -22,7 +22,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Filter { +task SnpSiftFilter { input { File vcf File? vcfIndex From 2fc90c9790b41781ca35144e0d495f293a614382 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 16:05:27 +0200 Subject: [PATCH 640/668] add a useless ls to check if a dnanexus error is caused by lazy loading --- bcftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 6200a1a1..5ab04c1c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,6 +362,8 @@ task View { command { set -e + ls ~{inputFileIndex} + mkdir -p "$(dirname ~{outputPath})" bcftools view \ ~{"--exclude " + exclude} \ From 82a5715109d7c352c016d2672cea27b0ab4eb7f0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 28 May 2025 09:14:43 +0200 Subject: [PATCH 641/668] add ls to snpeff, bcftools view and snpsift so I can see the paths when run on dnanexus --- bcftools.wdl | 2 +- snpeff.wdl | 1 + snpsift.wdl | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5ab04c1c..0381d4cf 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,7 +362,7 @@ task View { command { set -e - ls ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index 8718e01b..924db8db 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -45,6 +45,7 @@ task SnpEff { command { set -e + ls ~{vcf} ~{vcfIndex} mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ diff --git a/snpsift.wdl b/snpsift.wdl index 6b6a1feb..5daacd36 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -37,6 +37,8 @@ task SnpSiftFilter { command { set -e + ls ~{vcf} ~{vcfIndex} + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ From 0513965516fab2b2a6a4c9d146813e65ffa77b19 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 14:04:40 +0200 Subject: [PATCH 642/668] Update modkit.wdl --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 7546458a..424ba755 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 0.20) + 10 # Based on a linear model with some fudge (y=-0.13x - 4). + String memory = ceil(size(bam, "GiB") * 110) + 40 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From 58b52865e986970b7c49d10096afbf1d0eec8e84 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 15:37:47 +0200 Subject: [PATCH 643/668] Update modkit.wdl More reasonable bounds --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 424ba755..094f0041 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 110) + 40 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). + String memory = ceil(size(bam, "GiB") * 115) + 4 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From 5d4f097ad010fb12c4b7599511eaafc741b64932 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 16:01:43 +0200 Subject: [PATCH 644/668] re-correct it. --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 094f0041..4aecb517 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 115) + 4 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). + String memory = ceil(size(bam, "GiB") * 0.1) + 5 # Based on a linear model with some fudge (memory = 0.07540 * file_size - 0.6). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From eafceb0f98e68feb884f8a947c15c29a2e52eb5b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 3 Jun 2025 15:52:08 +0200 Subject: [PATCH 645/668] WIP add option to output compressed VCF files to snpeff and snpsift --- snpeff.wdl | 10 ++++++++-- snpsift.wdl | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/snpeff.wdl b/snpeff.wdl index 924db8db..e1b520af 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -40,9 +40,12 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" + # Multicontainer with snpeff 5.2 and bgzip/tabix 1.19.1 + String dockerImage = "quay.io/biocontainers/mulled-v2-2fe536b56916bd1d61a6a1889eb2987d9ea0cd2f:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} @@ -61,12 +64,15 @@ task SnpEff { ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} rm -r $PWD/data } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { diff --git a/snpsift.wdl b/snpsift.wdl index 5daacd36..d964c255 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -44,11 +44,14 @@ task SnpSiftFilter { filter \ "~{filterExpression}" \ ~{vcf} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { From 16656ff77fa9f88577298fd7e8cc00c5eba02004 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 5 Jun 2025 11:38:01 +0200 Subject: [PATCH 646/668] update changelog, fix missing variable --- CHANGELOG.md | 1 + snpsift.wdl | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ad69a3e..7de262af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. diff --git a/snpsift.wdl b/snpsift.wdl index d964c255..4c354f48 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -32,9 +32,12 @@ task SnpSiftFilter { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + # Multicontainer with SnpSift 5.2 and bgzip/tabix 1.22 + String dockerImage = "quay.io/biocontainers/mulled-v2-d4bc0c23eb1d95c7ecff7f0e8b3a4255503fd5d4:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} From e9189a7f5d61a46d1deec0108900a11d70630933 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 6 Jun 2025 10:55:53 +0200 Subject: [PATCH 647/668] missing trailing slash breaks samtools flagstat --- samtools.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 743fce0c..ac2e868a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -296,9 +296,10 @@ task Flagstat { command { set -e mkdir -p "$(dirname ~{outputPath})" + samtools flagstat \ - --threads ~{threads - 1} - ~{inputBam} > ~{outputPath} + --threads ~{threads - 1} \ + ~{inputBam} > ~{outputPath} } output { From 69a9c0a6751f78cfaa75c325fc49425113e268b1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 11:56:44 +0200 Subject: [PATCH 648/668] Add a task for bcftools norm --- CHANGELOG.md | 1 + bcftools.wdl | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7de262af..003aa97a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add a task for bcftools norm. + Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. diff --git a/bcftools.wdl b/bcftools.wdl index 0381d4cf..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -180,6 +180,67 @@ task Filter { } } +task Norm { + input { + File inputFile + File? inputFileIndex + String outputPath = "output.vcf.gz" + + File? fasta + String? regions + Boolean splitMultiallelicSites = false + + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + + command { + set -e + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + + mkdir -p "$(dirname ~{outputPath})" + bcftools norm \ + -o ~{outputPath} \ + -O ~{true="z" false="v" compressed} \ + ~{"--regions " + regions} \ + ~{"--fasta " + fasta} \ + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + + ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} + } + + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + fasta: {description: "Equivalent to bcftools norm's `--fasta` option.", category: "advanced"} + regions: {description: "Equivalent to bcftools norm's `--regions` option.", category: "advanced"} + splitMultiallelicSites: {description: "Whether multiallelic lines should be split up.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file."} + outputVcfIndex: {description: "Index of sorted VCF file."} + } +} + task Sort { input { File inputFile From 5d4f5a7fa3846dea7b8a16fce9c47d8674a5f260 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:26:56 +0200 Subject: [PATCH 649/668] more time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..5fb06016 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c97c55a47411b2395289ed3bf0357d8686dc7350 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:38:14 +0200 Subject: [PATCH 650/668] more time for bcftools Norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5fb06016..56564b17 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 + Int timeMinutes = 5 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 88ac2526f86f5a89d6de0fe74077f6bab05baf8d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:52:37 +0200 Subject: [PATCH 651/668] reset time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 56564b17..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 3c8ec631930d4ec7df1d01ba802d5943257dfd42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:58:40 +0200 Subject: [PATCH 652/668] fix bcftools norm --- bcftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..ae2dee4e 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -207,7 +207,8 @@ task Norm { -O ~{true="z" false="v" compressed} \ ~{"--regions " + regions} \ ~{"--fasta " + fasta} \ - ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} \ + ~{inputFile} ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} } From 5dab6c7b08f05e831ea110b44acdefedc298f67b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 13:36:41 +0200 Subject: [PATCH 653/668] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index ae2dee4e..b48956cc 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "2GiB" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 99c562c5e8ed51e8a2a04ec5dc72dada5248ff9e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 15:08:05 +0200 Subject: [PATCH 654/668] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index b48956cc..7e297bc7 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "10GiB" + String memory = "64GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 99b9aca2b67d6a5d138c0b4dc9317f6b03bbc395 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 11 Jun 2025 14:24:53 +0200 Subject: [PATCH 655/668] Disable ai in multiqc --- CHANGELOG.md | 1 + multiqc.wdl | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44f71e4b..e0b036ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ version 6.0.0-dev + Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. ++ Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. version 5.2.0 --------------------------- diff --git a/multiqc.wdl b/multiqc.wdl index fae52178..18667b91 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -39,6 +39,7 @@ task MultiQC { # This must be actively enabled in my opinion. # The tools default is to upload. Boolean megaQCUpload = false + Boolean enableAi = false Int? dirsDepth String? title @@ -124,6 +125,7 @@ task MultiQC { ~{true="--lint" false="" lint} \ ~{true="--pdf" false="" pdf} \ ~{false="--no-megaqc-upload" true="" megaQCUpload} \ + ~{false="--no-ai" true="" enableAi} \ ~{"--config " + config} \ ~{"--cl-config " + clConfig } \ ~{reportDir} @@ -159,6 +161,7 @@ task MultiQC { lint: {description: "Equivalent to MultiQC's `--lint` flag.", category: "advanced"} pdf: {description: "Equivalent to MultiQC's `--pdf` flag.", category: "advanced"} megaQCUpload: {description: "Opposite to MultiQC's `--no-megaqc-upload` flag.", category: "advanced"} + enableAi: {description: "Opposite to MultiQC's `--no-ai` flag.", category: "advanced"} dirsDepth: {description: "Equivalent to MultiQC's `--dirs-depth` option.", category: "advanced"} title: {description: "Equivalent to MultiQC's `--title` option.", category: "advanced"} comment: {description: "Equivalent to MultiQC's `--comment` option.", category: "advanced"} From c882527a4c6e3c476a2a1ba15319b30d70f6dc53 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 17 Jun 2025 10:46:19 +0200 Subject: [PATCH 656/668] Support supplying additional reports/config to multiqc --- CHANGELOG.md | 1 + multiqc.wdl | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b036ac..42542531 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ version 6.0.0-dev + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. ++ Support providing additional reports to MultiQC in workflow configuration. version 5.2.0 --------------------------- diff --git a/multiqc.wdl b/multiqc.wdl index 18667b91..8f05a36e 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -53,6 +53,7 @@ task MultiQC { File? fileList Array[String]+? exclude Array[String]+? module + Array[File]+? additionalReports String? dataFormat File? config # A directory String? clConfig @@ -79,13 +80,15 @@ task MultiQC { # strategy. Using python's builtin hash is unique enough # for these purposes. + Array[File] allReports = flatten([reports, select_all([additionalReports])]) + command { python3 < Date: Wed, 18 Jun 2025 08:38:36 +0200 Subject: [PATCH 657/668] ahmust be flatter --- multiqc.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiqc.wdl b/multiqc.wdl index 8f05a36e..db47ac87 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -80,7 +80,7 @@ task MultiQC { # strategy. Using python's builtin hash is unique enough # for these purposes. - Array[File] allReports = flatten([reports, select_all([additionalReports])]) + Array[File] allReports = flatten([reports, flatten(select_all([additionalReports]))]) command { python3 < Date: Mon, 7 Jul 2025 11:27:17 +0200 Subject: [PATCH 658/668] bump bedtools sort --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index a5d8aab3..50acd42d 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -209,7 +209,7 @@ task Sort { String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(inputBed, "GiB")) - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_2" } command { From c8fbb60c214cb3defe74b71aa60df6eefbae594a Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 7 Jul 2025 11:28:18 +0200 Subject: [PATCH 659/668] document --- CHANGELOG.md | 1 + bedtools.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b036ac..920993ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ bedtools.Sort: bumped container version to permit use of `faidx`. + Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. diff --git a/bedtools.wdl b/bedtools.wdl index 50acd42d..7fcce28f 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -224,7 +224,7 @@ task Sort { ~{true="-chrThenScoreA" false="" chrThenScoreA} \ ~{true="-chrThenScoreD" false="" chrThenScoreD} \ ~{"-g " + genome} \ - ~{"-faidx" + faidx} \ + ~{"-faidx " + faidx} \ > ~{outputBed} } From 633d0bc76ffb27f09b957e7eb26153a8f3d5edac Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 7 Jul 2025 12:33:54 +0200 Subject: [PATCH 660/668] Add no name check support, requiring bumping Intersect as well --- bedtools.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index 7fcce28f..64fccc7b 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -270,10 +270,11 @@ task Intersect { Boolean writeA = false Boolean writeB = false Boolean stranded = false + Boolean nonamecheck = false String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_2" } Boolean sorted = defined(faidx) @@ -289,6 +290,7 @@ task Intersect { ~{true="-wb" false="" writeB} \ ~{true="-s" false="" stranded} \ ~{true="-sorted" false="" sorted} \ + ~{true="-nonamecheck" false="" nonamecheck} \ ~{true="-g sorted.genome" false="" sorted} \ > ~{outputBed} } @@ -313,6 +315,7 @@ task Intersect { writeA: {description: "Write the original entry in A for each overlap.", category: "advanced"} writeB: {description: "Write the original entry in B for each overlap. Useful for knowing what A overlaps.", category: "advanced"} stranded: {description: "Force “strandedness”. That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand.", category: "advanced"} + nonamecheck: {description: "Disable the bedtools intersect name check. This is used to catch chr1 vs chr01 or chr1 vs 1 naming inconsistencies. However, it throws an error for GIAB hg38 which has capital letters. https://github.com/arq5x/bedtools2/issues/648", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From c676fe2198b18b437a3e279c240290b7227b94d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 17 Jul 2025 10:37:57 +0200 Subject: [PATCH 661/668] address review comments --- bcftools.wdl | 13 ++++++++----- snpeff.wdl | 7 ++++++- snpsift.wdl | 7 ++++++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 7e297bc7..31c7db13 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,8 +190,9 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "64GiB" + String memory = "4GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int diskGb = ceil(2.1 * size(inputFile, "G") + size(fasta, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -199,7 +200,7 @@ task Norm { command { set -e - ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools norm \ @@ -222,6 +223,7 @@ task Norm { memory: memory time_minutes: timeMinutes docker: dockerImage + disks: "local-disk ~{diskGb} SSD" # Based on an example in dxCompiler docs } parameter_meta { @@ -234,11 +236,12 @@ task Norm { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + diskGb: {description: "The amount of disk space needed for this job in GiB.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Sorted VCF file."} - outputVcfIndex: {description: "Index of sorted VCF file."} + outputVcf: {description: "Normalized VCF file."} + outputVcfIndex: {description: "Index of Normalized VCF file."} } } @@ -424,7 +427,7 @@ task View { command { set -e - ls ~{inputFile} ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index e1b520af..b972ab30 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -48,7 +48,7 @@ task SnpEff { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -82,6 +82,7 @@ task SnpEff { } parameter_meta { + # inputs vcf: {description: "A VCF file to analyse.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "required"} genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"} @@ -102,5 +103,9 @@ task SnpEff { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Annotated VCF file."} + outputVcfIndex: {description: "Index of annotated VCF file."} } } diff --git a/snpsift.wdl b/snpsift.wdl index 4c354f48..a62f7295 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -40,7 +40,7 @@ task SnpSiftFilter { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -64,6 +64,7 @@ task SnpSiftFilter { } parameter_meta { + # inputs vcf: {description: "A VCF file to filter.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "common"} filterExpression: {description: "The SnpSift filtering expression.", category: "required"} @@ -75,5 +76,9 @@ task SnpSiftFilter { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Filtered VCF file."} + outputVcfIndex: {description: "Index of filtered VCF file."} } } From 9e9ae08503c7c2e10c0fe16d018bfb2810c4f3de Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 11:36:55 +0200 Subject: [PATCH 662/668] Update clair3 image --- CHANGELOG.md | 1 + clair3.wdl | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c56b124a..5cabdece 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ version 6.0.0-dev + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. ++ Update clair3 version from 1.0.11 to 1.1.0 version 5.2.0 --------------------------- diff --git a/clair3.wdl b/clair3.wdl index 5a6154af..ae54ef40 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,8 +34,8 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 400 / threads) - String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + String dockerImage = "quay.io/biocontainers/clair3:1.1.0--py39hd649744_0" } String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" @@ -91,4 +91,4 @@ task Clair3 { vcfIndex: {description: "Output VCF index."} } -} \ No newline at end of file +} From d648745cfeedbc816081547f9772f0ee2d9f1692 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:13:43 +0200 Subject: [PATCH 663/668] Improve whatshap runtime/memory estimates --- CHANGELOG.md | 1 + whatshap.wdl | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cabdece..cfb8f41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ version 6.0.0-dev + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. + Update clair3 version from 1.0.11 to 1.1.0 ++ Improve whatshap runtime/memory usage for our cluster. version 5.2.0 --------------------------- diff --git a/whatshap.wdl b/whatshap.wdl index da86ad82..beef5e99 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -40,12 +40,19 @@ task Phase { String memory = "4GiB" Int timeMinutes = 120 + + String memory = 2 + ceil(size(bam, "G") / 20 ) + Int timeMinutes = 400 + ceil(size(bam, "G") * 0.9 ) + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e + + mkdir -p $(dirname ~{outputVCF}) + whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -110,12 +117,16 @@ task Stats { String? chromosome String memory = "4GiB" - Int timeMinutes = 120 + Int timeMinutes = 30 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { + set -e + + mkdir -p $(dirname ~{tsv}) + whatshap stats \ ~{vcf} \ ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ @@ -169,7 +180,9 @@ task Haplotag { String? regions String? sample - String memory = "4GiB" + String memory = 2 + ceil(size(bam, "G") / 50 ) + Int timeMinutes = 50 + ceil(size(bam, "G") * 2 ) + Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -177,6 +190,9 @@ task Haplotag { command { set -e + + mkdir -p $(dirname ~{outputFile}) + whatshap haplotag \ ~{vcf} \ ~{alignments} \ From 7e246b01de31489577c434f69a5adbd2ab7cea2c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:18:54 +0200 Subject: [PATCH 664/668] Add modkit tasks --- CHANGELOG.md | 3 + modkit.wdl | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfb8f41d..bf9d9238 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,9 @@ version 6.0.0-dev + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. + Update clair3 version from 1.0.11 to 1.1.0 + Improve whatshap runtime/memory usage for our cluster. ++ Add `Modkit.SampleProbs` ++ Add `Modkit.DmrMulti` ++ Add `Modkit.DmrMultiInputPrep` to construct the command line for `Modkit.DmrMulti` version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index 678e326a..a35d8ed2 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -130,7 +130,7 @@ task Summary { Int threads = 4 String memory = ceil(size(bam, "GiB") * 0.1) + 5 # Based on a linear model with some fudge (memory = 0.07540 * file_size - 0.6). - Int timeMinutes = 2880 / threads # 2 Days / threads + Int timeMinutes = 60 # originally this was set at "2 Days / threads" but with 4 threads and that much ram, it's pretty fast. String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" } @@ -177,3 +177,192 @@ task Summary { summaryReport: {description: "The output modkit summary."} } } + +task SampleProbs { + input { + File bam + File bamIndex + + String summary = "modkit-sample-probs" + + Boolean sample = true + Int? numReads # = 10042 + Float? samplingFrac # = 0.1 + Int? seed + + Int threads = 4 + String memory = "32G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p ~{summary} + + modkit sample-probs \ + --threads ~{threads} \ + --out-dir ~{summary} \ + ~{true="" false="--no-sampling" sample} \ + ~{"--num-reads " + numReads} \ + ~{"--sampling-frac " + samplingFrac} \ + ~{"--seed " + seed} \ + --hist \ + ~{bam} + >>> + + output { + File reportCounts = "~{summary}/counts.html" + File reportProportion = "~{summary}/proportion.html" + File reportProbabilitiesTsv = "~{summary}/probabilities.tsv" + File reportThresholdsTsv = "~{summary}/thresholds.tsv" + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} + +task DmrMultiInputPrep { + input { + Array[File] control + Array[File] condition + String controlName + String conditionName + + Int threads = 1 + String memory = "1G" + Int timeMinutes = 5 + String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" + } + + command <<< + cat > modkit_dmr.py <<'CODE' + #!/usr/bin/env python3 + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--control_n', type=str, default='control') + parser.add_argument('--control_f', type=str,nargs='+') + parser.add_argument('--condition_n', type=str, default='condition') + parser.add_argument('--condition_f', type=str,nargs='+') + args = parser.parse_args() + modkit = [] + for i, x in enumerate(args.control_f): + modkit.extend(['-s', x, f'{args.control_n}{i}']) + for i, x in enumerate(args.condition_f): + modkit.extend(['-s', x, f'{args.condition_n}{i}']) + print(' '.join(modkit), end='') + CODE + + python modkit_dmr.py \ + --control_n ~{controlName} \ + --control_f ~{sep=" " control} \ + --condition_n ~{conditionName} \ + --condition_f ~{sep=" " condition} + >>> + + output { + String params = select_first(read_lines(stdout())) + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } +} + + +task DmrMulti { + input { + String dmrMultiArguments + Array[File] control + Array[File] condition + + Array[File] controlIndex + Array[File] conditionIndex + + String controlName + String conditionName + + File referenceFasta + File referenceFastaFai + String dmr_dir = "results" + + File? cpg_islands + + Int threads = 4 + String memory = "32G" + Int timeMinutes = 600 + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p ~{dmr_dir} + + modkit dmr multi \ + ~{dmrMultiArguments} \ + --out-dir ~{dmr_dir} \ + ~{"--regions-bed " + cpg_islands} \ + --ref ~{referenceFasta} \ + --base C \ + --threads ~{threads} \ + --header \ + --log-filepath dmr_multi.log + >>> + + output { + # TODO: other files + File log = "dmr_multi.log" + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} From 8564f8c0a757cecd00155de011e10a1e51ab32cf Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:29:51 +0200 Subject: [PATCH 665/668] I don't feel like documenting it if it isn't used --- CHANGELOG.md | 2 - modkit.wdl | 122 --------------------------------------------------- 2 files changed, 124 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf9d9238..bb09f4f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,8 +57,6 @@ version 6.0.0-dev + Update clair3 version from 1.0.11 to 1.1.0 + Improve whatshap runtime/memory usage for our cluster. + Add `Modkit.SampleProbs` -+ Add `Modkit.DmrMulti` -+ Add `Modkit.DmrMultiInputPrep` to construct the command line for `Modkit.DmrMulti` version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index a35d8ed2..b38929f5 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -244,125 +244,3 @@ task SampleProbs { summaryReport: {description: "The output modkit summary."} } } - -task DmrMultiInputPrep { - input { - Array[File] control - Array[File] condition - String controlName - String conditionName - - Int threads = 1 - String memory = "1G" - Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" - } - - command <<< - cat > modkit_dmr.py <<'CODE' - #!/usr/bin/env python3 - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('--control_n', type=str, default='control') - parser.add_argument('--control_f', type=str,nargs='+') - parser.add_argument('--condition_n', type=str, default='condition') - parser.add_argument('--condition_f', type=str,nargs='+') - args = parser.parse_args() - modkit = [] - for i, x in enumerate(args.control_f): - modkit.extend(['-s', x, f'{args.control_n}{i}']) - for i, x in enumerate(args.condition_f): - modkit.extend(['-s', x, f'{args.condition_n}{i}']) - print(' '.join(modkit), end='') - CODE - - python modkit_dmr.py \ - --control_n ~{controlName} \ - --control_f ~{sep=" " control} \ - --condition_n ~{conditionName} \ - --condition_f ~{sep=" " condition} - >>> - - output { - String params = select_first(read_lines(stdout())) - } - - runtime { - docker: dockerImage - cpu: threads - memory: memory - time_minutes: timeMinutes - } -} - - -task DmrMulti { - input { - String dmrMultiArguments - Array[File] control - Array[File] condition - - Array[File] controlIndex - Array[File] conditionIndex - - String controlName - String conditionName - - File referenceFasta - File referenceFastaFai - String dmr_dir = "results" - - File? cpg_islands - - Int threads = 4 - String memory = "32G" - Int timeMinutes = 600 - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" - } - - command <<< - set -e - mkdir -p ~{dmr_dir} - - modkit dmr multi \ - ~{dmrMultiArguments} \ - --out-dir ~{dmr_dir} \ - ~{"--regions-bed " + cpg_islands} \ - --ref ~{referenceFasta} \ - --base C \ - --threads ~{threads} \ - --header \ - --log-filepath dmr_multi.log - >>> - - output { - # TODO: other files - File log = "dmr_multi.log" - } - - runtime { - docker: dockerImage - cpu: threads - memory: memory - time_minutes: timeMinutes - } - - parameter_meta { - # input - bam: {description: "The input alignment file", category: "required"} - bamIndex: {description: "The index for the input alignment file", category: "required"} - - sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} - numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} - samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} - seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} - - threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # output - summaryReport: {description: "The output modkit summary."} - } -} From 040a43e6723d672f97f126a1628e773d75fb6515 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:29:51 +0200 Subject: [PATCH 666/668] I don't feel like documenting it if it isn't used --- modkit.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index b38929f5..ddf4dbf7 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -229,6 +229,7 @@ task SampleProbs { # input bam: {description: "The input alignment file", category: "required"} bamIndex: {description: "The index for the input alignment file", category: "required"} + summary: {description: "A folder for the outputs", category: "required"} sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} @@ -241,6 +242,9 @@ task SampleProbs { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - summaryReport: {description: "The output modkit summary."} + reportCounts: {description: "The output html report of counts"} + reportProportion: {description: "The output html report of proportions"} + reportProbabilitiesTsv: {description: "The output TSV of Probabilities"} + reportThresholdsTsv: {description: "The output TSV of thresholds"} } } From bd54aeb0bcdd67db3fb180b890954bca92000287 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 13:13:43 +0200 Subject: [PATCH 667/668] incorrect inputs --- whatshap.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index beef5e99..3b2bd1d3 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -41,8 +41,8 @@ task Phase { String memory = "4GiB" Int timeMinutes = 120 - String memory = 2 + ceil(size(bam, "G") / 20 ) - Int timeMinutes = 400 + ceil(size(bam, "G") * 0.9 ) + String memory = 2 + ceil(size(phaseInput, "G") / 20 ) + Int timeMinutes = 400 + ceil(size(phaseInput, "G") * 0.9 ) # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -180,8 +180,8 @@ task Haplotag { String? regions String? sample - String memory = 2 + ceil(size(bam, "G") / 50 ) - Int timeMinutes = 50 + ceil(size(bam, "G") * 2 ) + String memory = 2 + ceil(size(alignments, "G") / 50 ) + Int timeMinutes = 50 + ceil(size(alignments, "G") * 2 ) Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. From e7061594546ceac5e7bbcdc48877bc78b5ec795c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 15:46:06 +0200 Subject: [PATCH 668/668] Fix duplicate declarations --- whatshap.wdl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 3b2bd1d3..b491f566 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -38,9 +38,6 @@ task Phase { String? threshold String? ped - String memory = "4GiB" - Int timeMinutes = 120 - String memory = 2 + ceil(size(phaseInput, "G") / 20 ) Int timeMinutes = 400 + ceil(size(phaseInput, "G") * 0.9 ) @@ -183,7 +180,6 @@ task Haplotag { String memory = 2 + ceil(size(alignments, "G") / 50 ) Int timeMinutes = 50 + ceil(size(alignments, "G") * 2 ) - Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" }