From 6a93ad835ce49048b03c97020cbe5d387dc7fec2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 30 Mar 2020 10:00:14 +0200 Subject: [PATCH 001/902] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index fd2a0186..944880fa 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0 +3.2.0 From 147a115c78c883770b72013efd9cfdec68434f06 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Mar 2020 10:38:19 +0200 Subject: [PATCH 002/902] add task for STAR index generation --- star.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/star.wdl b/star.wdl index e1e55a26..5de925b5 100644 --- a/star.wdl +++ b/star.wdl @@ -20,6 +20,70 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task GenomeGenerate { + input { + String genomeDir = "STAR_index" + File referenceFasta + File? referenceGtf + Int? sjdbOverhang + + Int threads = 4 + String memory = "60G" + String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" + } + + command { + set -e + mkdir -p "$(dirname ~{genomeDir})" + STAR \ + --runMode genomeGenerate \ + --runThreadN ~{threads} \ + --genomeDir ~{genomeDir} \ + --genomeFastaFiles ~{referenceFasta} \ + ~{"--sjdbGTFfile " + referenceGtf} \ + ~{"--sjdbOverhang " + sjdbOverhang} + } + + output { + File chrLength = "~{genomeDir}/chrLength.txt" + File chrNameLength = "~{genomeDir}/chrNameLength.txt" + File chrName = "~{genomeDir}/chrName.txt" + File chrStart = "~{genomeDir}/chrStart.txt" + File genome = "~{genomeDir}/genome.txt" + File genomeParameters = "~{genomeDir}/genomeParameters.txt" + File sa = "~{genomeDir}/SA" + File saIndex = "~{genomeDir}/SAindex" + File? exonGeTrInfo = "~{genomeDir}/exonGeTrInfo.tab" + File? exonInfo = "~{genomeDir}/exonInfo.tab" + File? geneInfo = "~{genomeDir}/geneInfo.tab" + File? sjdbInfo = "~{genomeDir}/sjdbInfo.txt" + File? sjdbListFromGtfOut = "~{genomeDir}/sjdbList.fromGTF.out.tab" + File? sjdbListOut = "~{genomeDir}/sjdbList.out.tab" + File? transcriptInfo = "~{genomeDir}/transcriptInfo.tab" + Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, chrStart, genome, genomeParameters, + sa, saIndex, exonGeTrInfo, exonInfo, geneInfo, sjdbInfo, sjdbListFromGtfOut, + sjdbListOut, transcriptInfo]) + } + + runtime { + cpu: threads + memory: memory + docker: dockerImage + } + + parameter_meta { + genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} + referenceFasta: {description: "The reference Fasta file.", category: "required"} + referenceGtf: {description: "The reference GTF file.", category: "common"} + sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Star { input { Array[File]+ inputR1 From cc40f3aa341f5d8fbf5b02cb92cf5964af82be1d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Mar 2020 10:40:40 +0200 Subject: [PATCH 003/902] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3a3744..21019ef2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 3.2.0 +--------------------------- ++ Added STAR GenomeGenerate task. + version 3.1.0 --------------------------- + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the From fe306e5f31d08fadc8482d899d3eefb35e46d5fe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Apr 2020 16:19:14 +0200 Subject: [PATCH 004/902] add runtime_minutes and adjust memory of star index task --- star.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index 5de925b5..e272cd02 100644 --- a/star.wdl +++ b/star.wdl @@ -28,7 +28,8 @@ task GenomeGenerate { Int? sjdbOverhang Int threads = 4 - String memory = "60G" + String memory = "32G" + Int minutesPerGB = 240 String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -68,6 +69,7 @@ task GenomeGenerate { runtime { cpu: threads memory: memory + runtime_minutes: ceil(size(referenceFasta, "G") * minutesPerGB / threads) docker: dockerImage } From 619781f26255421acdaa64daa930c279219eba5b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 2 Apr 2020 14:17:08 +0200 Subject: [PATCH 005/902] Add fastqc requirements --- fastqc.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fastqc.wdl b/fastqc.wdl index 4d10147c..7e693f67 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -38,7 +38,7 @@ task Fastqc { String? dir Int threads = 1 - String memory = "4G" + String memory = "1G" String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile @@ -80,10 +80,12 @@ task Fastqc { Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else NoneArray } + Int estimatedRuntime = 1 + ceil(size(seqFile, "G")) * 4 runtime { cpu: threads memory: memory docker: dockerImage + runtime_minutes: estimatedRuntime } parameter_meta { From 802b65e68ad7b8bcd700649eabd62cbc66b3944c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 2 Apr 2020 14:25:09 +0200 Subject: [PATCH 006/902] make estimates overrideable --- fastqc.wdl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fastqc.wdl b/fastqc.wdl index 7e693f67..b1a587ca 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -38,11 +38,14 @@ task Fastqc { String? dir Int threads = 1 - String memory = "1G" + String? memory + Int? runtimeMinutes String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile } + String estimatedMemoryMB = "~{250 + 250 * threads}M" + Int estimatedRuntimeMin = 1 + ceil(size(seqFile, "G")) * 4 # Chops of the .gz extension if present. # The Basename needs to be taken here. Otherwise paths might differ between similar jobs. @@ -80,12 +83,11 @@ task Fastqc { Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else NoneArray } - Int estimatedRuntime = 1 + ceil(size(seqFile, "G")) * 4 runtime { cpu: threads - memory: memory + memory: select_first([memory, estimatedMemoryMB]) docker: dockerImage - runtime_minutes: estimatedRuntime + runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) } parameter_meta { From 4fb4583e643c56b893f6970671d0752616f63b01 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 2 Apr 2020 14:37:13 +0200 Subject: [PATCH 007/902] estimate cutadapt memory and runtime --- cutadapt.wdl | 8 ++++++-- fastqc.wdl | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index d04865b6..bb04e337 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -79,9 +79,12 @@ task Cutadapt { # Hence we use compression level 1 here. Int compressionLevel = 1 # This only affects outputs with the .gz suffix. Int cores = 4 - String memory = "4G" + String? memory + Int? runtimeMinutes String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" } + Int estimatedRuntimeMin = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) + String estimatedMemory = "~{300 + 100 * cores}M" String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) String read2outputArg = if (defined(read2)) @@ -166,7 +169,8 @@ task Cutadapt { runtime { cpu: cores - memory: memory + memory: select_first([memory, estimatedMemory]) + runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) docker: dockerImage } diff --git a/fastqc.wdl b/fastqc.wdl index b1a587ca..adc103e8 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -44,7 +44,7 @@ task Fastqc { Array[File]? NoneArray File? NoneFile } - String estimatedMemoryMB = "~{250 + 250 * threads}M" + String estimatedMemory = "~{250 + 250 * threads}M" Int estimatedRuntimeMin = 1 + ceil(size(seqFile, "G")) * 4 # Chops of the .gz extension if present. @@ -85,7 +85,7 @@ task Fastqc { runtime { cpu: threads - memory: select_first([memory, estimatedMemoryMB]) + memory: select_first([memory, estimatedMemory]) docker: dockerImage runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) } From 3d59d9b820c25fcdd51ac537767f73c695850b6d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 2 Apr 2020 15:24:44 +0200 Subject: [PATCH 008/902] rename time parameter --- cutadapt.wdl | 6 +++--- fastqc.wdl | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index bb04e337..5f51e9ca 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -80,10 +80,10 @@ task Cutadapt { Int compressionLevel = 1 # This only affects outputs with the .gz suffix. Int cores = 4 String? memory - Int? runtimeMinutes + Int? timeMinutes String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" } - Int estimatedRuntimeMin = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) + Int estimatedTimeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String estimatedMemory = "~{300 + 100 * cores}M" String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) @@ -170,7 +170,7 @@ task Cutadapt { runtime { cpu: cores memory: select_first([memory, estimatedMemory]) - runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) + time_minutes: select_first([timeMinutes, estimatedTimeMinutes]) docker: dockerImage } diff --git a/fastqc.wdl b/fastqc.wdl index adc103e8..fc935890 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -39,13 +39,13 @@ task Fastqc { Int threads = 1 String? memory - Int? runtimeMinutes + Int? timeMinutes String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile } String estimatedMemory = "~{250 + 250 * threads}M" - Int estimatedRuntimeMin = 1 + ceil(size(seqFile, "G")) * 4 + Int estimatedTimeMinutes = 1 + ceil(size(seqFile, "G")) * 4 # Chops of the .gz extension if present. # The Basename needs to be taken here. Otherwise paths might differ between similar jobs. @@ -87,7 +87,7 @@ task Fastqc { cpu: threads memory: select_first([memory, estimatedMemory]) docker: dockerImage - runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) + time_minutes: select_first([timeMinutes, estimatedTimeMinutes]) } parameter_meta { From 6657f26cb4aaed1a7d142473b353750c3290adc7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 2 Apr 2020 15:35:49 +0200 Subject: [PATCH 009/902] runtime_minutes -> time_minutes --- star.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/star.wdl b/star.wdl index e272cd02..4c407331 100644 --- a/star.wdl +++ b/star.wdl @@ -29,7 +29,7 @@ task GenomeGenerate { Int threads = 4 String memory = "32G" - Int minutesPerGB = 240 + Int timeMinutes = ceil(size(referenceFasta, "G") * 240 / threads) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -69,7 +69,7 @@ task GenomeGenerate { runtime { cpu: threads memory: memory - runtime_minutes: ceil(size(referenceFasta, "G") * minutesPerGB / threads) + time_minutes: timeMinutes docker: dockerImage } From 5b827f8a90d7a0efe2764a6afcf7d23c4fec24fc Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 2 Apr 2020 16:16:20 +0200 Subject: [PATCH 010/902] add smoove.wdl --- smoove.wdl | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 smoove.wdl diff --git a/smoove.wdl b/smoove.wdl new file mode 100644 index 00000000..93523f3d --- /dev/null +++ b/smoove.wdl @@ -0,0 +1,68 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task CallSV { + input { + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputDir = "./smoove" + + String memory = "15G" + String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputDir})" + smoove call \ + --outdir ~{outputDir} \ + --name ~{sample} \ + --fasta ~{referenceFasta} \ + ~{bamFile} + } + + output { + File smooveVcf = outputDir + "/" + sample + "-smoove.vcf.gz" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } + outputDir: {description: "The location the output VCF file should be written.", category: "common"} + sample: {description: "The name of the sample", category: "required"} + memory: {description: "The memory required to run the programs", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 7a1ff5d1fcb5fa60c64494212c30165891dbe175 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 2 Apr 2020 16:17:55 +0200 Subject: [PATCH 011/902] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3a3744..797ca07b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Add WDL task for smoove (lumpy) sv-caller. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. + Lima: Add missing output to parameter_meta. From 843055e1278addf3ea2c3f180225347e917a4b16 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Apr 2020 14:42:06 +0200 Subject: [PATCH 012/902] add time_minutes part I --- biowdl.wdl | 4 ++++ bowtie.wdl | 4 +++- chunked-scatter.wdl | 2 ++ collect-columns.wdl | 4 +++- common.wdl | 5 +++++ umi-tools.wdl | 2 ++ 6 files changed, 19 insertions(+), 2 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index 7aa68b27..c8855406 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -31,6 +31,8 @@ task InputConverter { Boolean skipFileCheck=true Boolean checkFileMd5sums=false Boolean old=false + + Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } @@ -50,6 +52,8 @@ task InputConverter { } runtime { + memory: "2G" + time_minutes: timeMinutes docker: dockerImage } diff --git a/bowtie.wdl b/bowtie.wdl index 18fd6146..a47be1da 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -37,7 +37,8 @@ task Bowtie { String? samRG Int threads = 1 - String memory = "16G" + Int timeMinutes = ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) + String memory = "10G" String picardXmx = "4G" # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" @@ -78,6 +79,7 @@ task Bowtie { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 619292d9..2e5f6bdc 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -28,6 +28,7 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile + Int timeMinutes String dockerImage = "quay.io/biocontainers/chunked-scatter:0.1.0--py_0" } @@ -48,6 +49,7 @@ task ChunkedScatter { runtime { memory: "4G" + timeMinutes: 5 docker: dockerImage } diff --git a/collect-columns.wdl b/collect-columns.wdl index 8b1fa387..cc841521 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -33,6 +33,8 @@ task CollectColumns { File? referenceGtf String? featureAttribute + Int memoryGb = 4 + ceil(0.5 * length(inputTables)) + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/collect-columns:0.2.0--py_1" } @@ -56,7 +58,7 @@ task CollectColumns { File outputTable = outputPath } - Int memoryGb = 4 + ceil(0.5 * length(inputTables)) + runtime { memory: "~{memoryGb}G" diff --git a/common.wdl b/common.wdl index ef86abcc..516ce144 100644 --- a/common.wdl +++ b/common.wdl @@ -210,6 +210,9 @@ task YamlToJson { input { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" + + Int timeMinutes = 2 + String memory = "1G" # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } @@ -230,6 +233,8 @@ task YamlToJson { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } diff --git a/umi-tools.wdl b/umi-tools.wdl index 07518e57..415081fc 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -76,6 +76,7 @@ task Dedup { Boolean paired = true String memory = "5G" + Int timeMinutes = ceil(size(inputBam, "G") * 18) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" @@ -105,6 +106,7 @@ task Dedup { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } From 44f87bdea0fa003149e3c9c85e33b59ba164ffa2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Apr 2020 14:50:11 +0200 Subject: [PATCH 013/902] move estimates to runtime section --- cutadapt.wdl | 6 ++---- fastqc.wdl | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index 5f51e9ca..fbd51384 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -83,8 +83,6 @@ task Cutadapt { Int? timeMinutes String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" } - Int estimatedTimeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String estimatedMemory = "~{300 + 100 * cores}M" String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) String read2outputArg = if (defined(read2)) @@ -169,8 +167,8 @@ task Cutadapt { runtime { cpu: cores - memory: select_first([memory, estimatedMemory]) - time_minutes: select_first([timeMinutes, estimatedTimeMinutes]) + memory: select_first([memory, "~{300 + 100 * cores}M"]) + time_minutes: select_first([timeMinutes, 1 + ceil(size([read1, read2], "G") * 12.0 / cores)]) docker: dockerImage } diff --git a/fastqc.wdl b/fastqc.wdl index fc935890..3e461053 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -44,8 +44,6 @@ task Fastqc { Array[File]? NoneArray File? NoneFile } - String estimatedMemory = "~{250 + 250 * threads}M" - Int estimatedTimeMinutes = 1 + ceil(size(seqFile, "G")) * 4 # Chops of the .gz extension if present. # The Basename needs to be taken here. Otherwise paths might differ between similar jobs. @@ -85,9 +83,9 @@ task Fastqc { runtime { cpu: threads - memory: select_first([memory, estimatedMemory]) + memory: select_first([memory, "~{250 + 250 * threads}M"]) docker: dockerImage - time_minutes: select_first([timeMinutes, estimatedTimeMinutes]) + time_minutes: select_first([timeMinutes, 1 + ceil(size(seqFile, "G")) * 4]) } parameter_meta { From 2c9810f11aef4f7a86381547982feea681f2bbc9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 6 Apr 2020 16:50:28 +0200 Subject: [PATCH 014/902] add time_minutes part II --- biopet/biopet.wdl | 8 +++++--- biowdl.wdl | 3 ++- bowtie.wdl | 1 + bwa.wdl | 12 +++++++++--- chunked-scatter.wdl | 4 ++-- collect-columns.wdl | 3 +++ common.wdl | 4 +++- gatk.wdl | 20 ++++++++++++++------ picard.wdl | 21 +++++++++++++++------ samtools.wdl | 7 ++++++- 10 files changed, 60 insertions(+), 23 deletions(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index ec64fb4b..7634e7b0 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -264,8 +264,9 @@ task ScatterRegions { File? bamFile File? bamIndex - String memory = "24G" - String javaXmx = "8G" + String memory = "1G" + String javaXmx = "500M" + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" } @@ -277,7 +278,7 @@ task ScatterRegions { command <<< set -e -o pipefail mkdir -p ~{outputDirPath} - biopet-scatterregions -Xmx~{javaXmx} \ + biopet-scatterregions -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -R ~{referenceFasta} \ -o ~{outputDirPath} \ ~{"-s " + scatterSize} \ @@ -306,6 +307,7 @@ task ScatterRegions { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } diff --git a/biowdl.wdl b/biowdl.wdl index c8855406..7661a592 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -32,7 +32,7 @@ task InputConverter { Boolean checkFileMd5sums=false Boolean old=false - Int timeMinutes = 2 + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } @@ -66,6 +66,7 @@ task InputConverter { checkFileMd5sums: {description: "Whether or not the MD5 sums of the files mentioned in the samplesheet should be checked.", category: "advanced"} old: {description: "Whether or not the old samplesheet format should be used.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/bowtie.wdl b/bowtie.wdl index a47be1da..94a809fa 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -100,6 +100,7 @@ task Bowtie { category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/bwa.wdl b/bwa.wdl index fec2b09f..247386d8 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,8 +29,9 @@ task Mem { String? readgroup Int threads = 4 - String memory = "32G" + String memory = "20G" String picardXmx = "4G" + Int timeMinutes = ceil(size([read1, read2], "G") * 200 / threads) # A mulled container is needed to have both picard and bwa in one container. # This container contains: picard (2.18.7), bwa (0.7.17-r1188) String dockerImage = "quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:43ec6124f9f4f875515f9548733b8b4e5fed9aa6-0" @@ -45,7 +46,7 @@ task Mem { ~{bwaIndex.fastaFile} \ ~{read1} \ ~{read2} \ - | picard -Xmx~{picardXmx} SortSam \ + | picard -Xmx~{picardXmx} -XX:ParallelGCThreads=1 SortSam \ INPUT=/dev/stdin \ OUTPUT=~{outputPath} \ SORT_ORDER=coordinate \ @@ -60,6 +61,7 @@ task Mem { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -74,6 +76,7 @@ task Mem { memory: {description: "The amount of memory this job will use.", category: "advanced"} picardXmx: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -96,7 +99,8 @@ task Kit { # GATK/Picard default is level 2. String sortMemoryPerThread = "4G" Int compressionLevel = 1 - String memory = "32G" + String memory = "20G" + Int timeMinutes = ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" } @@ -130,6 +134,7 @@ task Kit { runtime { cpu: threads + 1 # One thread for bwa-postalt + samtools. memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -146,6 +151,7 @@ task Kit { sortMemoryPerThread: {description: "The amount of memory for each sorting thread.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 2e5f6bdc..1b81687a 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -28,7 +28,7 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile - Int timeMinutes + Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:0.1.0--py_0" } @@ -49,7 +49,7 @@ task ChunkedScatter { runtime { memory: "4G" - timeMinutes: 5 + time_minutes: timeMinutes docker: dockerImage } diff --git a/collect-columns.wdl b/collect-columns.wdl index cc841521..6855b6c0 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -62,6 +62,7 @@ task CollectColumns { runtime { memory: "~{memoryGb}G" + time_minutes: timeMinutes docker: dockerImage } @@ -86,6 +87,8 @@ task CollectColumns { category: "advanced"} featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} + memoryGb: {description: "The maximum amount of memory the job will need in GB", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/common.wdl b/common.wdl index 516ce144..be60f8cf 100644 --- a/common.wdl +++ b/common.wdl @@ -211,7 +211,7 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - Int timeMinutes = 2 + Int timeMinutes = 1 String memory = "1G" # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" @@ -241,6 +241,8 @@ task YamlToJson { parameter_meta { yaml: {description: "The YAML file to convert.", category: "required"} outputJson: {description: "The location the output JSON file should be written to.", category: "advanced"} + memory: {description: "The maximum aount of memroy the job will need.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/gatk.wdl b/gatk.wdl index b730cbee..c9c1d5eb 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -90,8 +90,9 @@ task ApplyBQSR { File referenceFastaDict File referenceFastaFai - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -139,6 +140,7 @@ task ApplyBQSR { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -159,15 +161,16 @@ task BaseRecalibrator { File referenceFastaDict File referenceFastaFai - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{recalibrationReportPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ BaseRecalibrator \ -R ~{referenceFasta} \ -I ~{inputBam} \ @@ -184,6 +187,7 @@ task BaseRecalibrator { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -205,6 +209,7 @@ task BaseRecalibrator { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -688,15 +693,16 @@ task GatherBqsrReports { Array[File] inputBQSRreports String outputReportPath - String memory = "12G" - String javaXmx = "4G" + String memory = "1G" + String javaXmx = "500M" + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputReportPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherBQSRReports \ -I ~{sep=' -I ' inputBQSRreports} \ -O ~{outputReportPath} @@ -708,6 +714,7 @@ task GatherBqsrReports { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -718,6 +725,7 @@ task GatherBqsrReports { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/picard.wdl b/picard.wdl index 7df96aa9..895acea0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -83,8 +83,9 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - String memory = "32G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -92,7 +93,7 @@ task CollectMultipleMetrics { command { set -e mkdir -p "$(dirname ~{basename})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ CollectMultipleMetrics \ I=~{inputBam} \ R=~{referenceFasta} \ @@ -153,6 +154,7 @@ task CollectMultipleMetrics { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -188,6 +190,7 @@ task CollectMultipleMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -320,15 +323,16 @@ task GatherBamFiles { Array[File]+ inputBamsIndex String outputBamPath - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = ceil(size(inputBams, "G") * 0.5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { set -e mkdir -p "$(dirname ~{outputBamPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ @@ -344,6 +348,7 @@ task GatherBamFiles { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -356,6 +361,7 @@ task GatherBamFiles { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -412,8 +418,9 @@ task MarkDuplicates { String outputBamPath String metricsPath - String memory = "24G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = ceil(size(inputBams, "G")* 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" # The program default for READ_NAME_REGEX is appropriate in nearly every case. @@ -431,7 +438,7 @@ task MarkDuplicates { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ MarkDuplicates \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ @@ -454,6 +461,7 @@ task MarkDuplicates { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -468,6 +476,7 @@ task MarkDuplicates { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/samtools.wdl b/samtools.wdl index a4a893a1..ca6de261 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -203,6 +203,8 @@ task Flagstat { File inputBam String outputPath + String memory = "1G" + Int timeMinutes = size(inputBam, "G") String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -217,6 +219,8 @@ task Flagstat { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -224,6 +228,7 @@ task Flagstat { # inputs inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} outputPath: {description: "The location the ouput should be written to.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -399,7 +404,7 @@ task FilterShortReadsBam { String outputPathBam String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - + String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") command { From 99b1586cf50642a211a35f7aec880796d8f7d92e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 7 Apr 2020 12:18:57 +0200 Subject: [PATCH 015/902] time_minutes --- gatk.wdl | 28 +++++++++++++++++++--------- picard.wdl | 7 +++++-- umi-tools.wdl | 1 + 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index c9c1d5eb..125e184b 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -92,7 +92,7 @@ task ApplyBQSR { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can be used. + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -122,6 +122,7 @@ task ApplyBQSR { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -163,7 +164,7 @@ task BaseRecalibrator { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can be used. + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -413,15 +414,16 @@ task CombineGVCFs { File referenceFastaDict File referenceFastaFai - String memory = "24G" - String javaXmx = "12G" + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ CombineGVCFs \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -436,6 +438,7 @@ task CombineGVCFs { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -453,6 +456,7 @@ task CombineGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -795,15 +799,16 @@ task GenotypeGVCFs { File? dbsnpVCFIndex File? pedigree - String memory = "18G" + String memory = "7G" String javaXmx = "6G" + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GenotypeGVCFs \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -823,6 +828,7 @@ task GenotypeGVCFs { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -843,6 +849,7 @@ task GenotypeGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -919,15 +926,16 @@ task HaplotypeCaller { Boolean gvcf = false String emitRefConfidence = if gvcf then "GVCF" else "NONE" - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ HaplotypeCaller \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -949,6 +957,7 @@ task HaplotypeCaller { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -976,6 +985,7 @@ task HaplotypeCaller { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/picard.wdl b/picard.wdl index 895acea0..145af9ef 100644 --- a/picard.wdl +++ b/picard.wdl @@ -489,8 +489,9 @@ task MergeVCFs { Array[File]+ inputVCFsIndexes String outputVcfPath - String memory = "24G" - String javaXmx = "8G" + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = size(inputVCFs, "G") String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -513,6 +514,7 @@ task MergeVCFs { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -525,6 +527,7 @@ task MergeVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/umi-tools.wdl b/umi-tools.wdl index 415081fc..c44635ce 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -118,6 +118,7 @@ task Dedup { umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} memory: {description: "The amount of memory required for the task.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 5ffb5473d85b77ba20dac1dc9329c2d8a6436166 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Apr 2020 13:51:50 +0200 Subject: [PATCH 016/902] put requirements as defaults --- cutadapt.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index fbd51384..421259d9 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -79,8 +79,8 @@ task Cutadapt { # Hence we use compression level 1 here. Int compressionLevel = 1 # This only affects outputs with the .gz suffix. Int cores = 4 - String? memory - Int? timeMinutes + String memory = "~{300 + 100 * cores}M" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" } @@ -167,8 +167,8 @@ task Cutadapt { runtime { cpu: cores - memory: select_first([memory, "~{300 + 100 * cores}M"]) - time_minutes: select_first([timeMinutes, 1 + ceil(size([read1, read2], "G") * 12.0 / cores)]) + memory: memory + time_minutes: timeMinutes docker: dockerImage } From 271cc57a82d4f174a75c44ae3228a44176949e53 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Apr 2020 13:53:25 +0200 Subject: [PATCH 017/902] put requirements as defaults --- fastqc.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fastqc.wdl b/fastqc.wdl index 3e461053..6708a6bc 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -38,8 +38,9 @@ task Fastqc { String? dir Int threads = 1 - String? memory - Int? timeMinutes + # Fastqc uses 250MB per thread in its wrapper. + String memory = "~{250 + 250 * threads}M" + Int? timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile @@ -83,9 +84,9 @@ task Fastqc { runtime { cpu: threads - memory: select_first([memory, "~{250 + 250 * threads}M"]) + memory: memory docker: dockerImage - time_minutes: select_first([timeMinutes, 1 + ceil(size(seqFile, "G")) * 4]) + time_minutes: timeMinutes } parameter_meta { From 7f4e474765273ed670786c2dca1b7db57995812d Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Apr 2020 15:13:36 +0200 Subject: [PATCH 018/902] Replace mv command with cp. --- lima.wdl | 10 +++++----- scripts | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lima.wdl b/lima.wdl index 747959a1..c33ee031 100644 --- a/lima.wdl +++ b/lima.wdl @@ -87,13 +87,13 @@ task Lima { ~{barcodeFile} \ ~{basename(outputPrefix) + ".fl.bam"} - # Move commands below are needed because glob command does not find + # copy commands below are needed because glob command does not find # multiple bam/bam.pbi/subreadset.xml files when not located in working # directory. - mv "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" - mv "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" - mv "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" - mv "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" + cp "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" + cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" + cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" + cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" } output { diff --git a/scripts b/scripts index dfef7cb2..b83da72b 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit dfef7cb2555667126dc1751add414527240d71bc +Subproject commit b83da72b9b43b956a3062b78fb08044eb9fae464 From fca3b3f03259515e00e3028a3fa2ca2db4e2cdb1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Apr 2020 15:14:11 +0200 Subject: [PATCH 019/902] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3a3744..4bc971ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Lima: Replace mv command with cp. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. + Lima: Add missing output to parameter_meta. From 722633698afd7c813f853ca91fbf318b6333e2d4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Apr 2020 15:42:55 +0200 Subject: [PATCH 020/902] Update runtime. --- CHANGELOG.md | 1 + ccs.wdl | 2 ++ isoseq3.wdl | 2 ++ lima.wdl | 2 ++ 4 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bc971ee..d7068687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Add time_minutes runtime to CCS, Lima & IsoSeq3. + Lima: Replace mv command with cp. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. diff --git a/ccs.wdl b/ccs.wdl index 39bb0a19..a0941e97 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -34,6 +34,7 @@ task CCS { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" + Int timeMinutes = ceil(size(subreadsFile, "G") * 240 / cores) } command { @@ -64,6 +65,7 @@ task CCS { cpu: cores memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { diff --git a/isoseq3.wdl b/isoseq3.wdl index 8cc0db8f..de58bc42 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,6 +32,7 @@ task Refine { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" + Int timeMinutes = ceil(size(inputBamFile, "G") * 240 / cores) } command <<< @@ -77,6 +78,7 @@ task Refine { cpu: cores memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { diff --git a/lima.wdl b/lima.wdl index c33ee031..5e9c32f1 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,6 +51,7 @@ task Lima { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" + Int timeMinutes = ceil(size(inputBamFile, "G") * 240 / cores) } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} @@ -111,6 +112,7 @@ task Lima { cpu: cores memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { From 56030a53809de880c739bec537902a870e921d97 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Apr 2020 15:45:28 +0200 Subject: [PATCH 021/902] Update parameter_meta. --- ccs.wdl | 1 + isoseq3.wdl | 1 + lima.wdl | 1 + 3 files changed, 3 insertions(+) diff --git a/ccs.wdl b/ccs.wdl index a0941e97..983900dc 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -81,6 +81,7 @@ task CCS { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputCCSfile: {description: "Consensus reads output file."} diff --git a/isoseq3.wdl b/isoseq3.wdl index de58bc42..0a3f2636 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -92,6 +92,7 @@ task Refine { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputFLNCfile: {description: "Filtered reads output file."} diff --git a/lima.wdl b/lima.wdl index 5e9c32f1..5a9646db 100644 --- a/lima.wdl +++ b/lima.wdl @@ -145,6 +145,7 @@ task Lima { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputFLfile: {description: "Demultiplexed reads output file(s)."} From 1e71e46d5fa85b05174104b7e8ba22948103d214 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 7 Apr 2020 16:42:06 +0200 Subject: [PATCH 022/902] time_minutes --- bcftools.wdl | 6 ++++ clever.wdl | 5 ++- collect-columns.wdl | 2 -- delly.wdl | 3 ++ gatk.wdl | 7 ++-- multiqc.wdl | 4 ++- picard.wdl | 34 ++++++++++++-------- samtools.wdl | 78 ++++++++++++++++++++++++--------------------- 8 files changed, 84 insertions(+), 55 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 122fcdd1..d923885d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -26,6 +26,8 @@ task Bcf2Vcf { input { File bcf String outputPath = "./bcftools/SV.vcf" + String memory = "2G" + Int timeMinutes = ceil(size(bcf, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } @@ -40,12 +42,16 @@ task Bcf2Vcf { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { bcf: {description: "The generated BCF from an SV caller", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/clever.wdl b/clever.wdl index e1dcf5a6..2da9f4d2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -90,7 +90,8 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "15G" + String memory = "55G" + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -114,6 +115,7 @@ task Prediction { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -125,6 +127,7 @@ task Prediction { outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/collect-columns.wdl b/collect-columns.wdl index 6855b6c0..ed2a4577 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -58,8 +58,6 @@ task CollectColumns { File outputTable = outputPath } - - runtime { memory: "~{memoryGb}G" time_minutes: timeMinutes diff --git a/delly.wdl b/delly.wdl index ad8f18d9..efa1bf60 100644 --- a/delly.wdl +++ b/delly.wdl @@ -31,6 +31,7 @@ task CallSV { String outputPath = "./delly/delly.vcf" String memory = "15G" + Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } @@ -49,6 +50,7 @@ task CallSV { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -60,6 +62,7 @@ task CallSV { referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/gatk.wdl b/gatk.wdl index 125e184b..cb26ca75 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1439,15 +1439,16 @@ task SplitNCigarReads { String outputBam Array[File] intervals = [] - String memory = "16G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputBam})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ SplitNCigarReads \ -I ~{inputBam} \ -R ~{referenceFasta} \ @@ -1462,6 +1463,7 @@ task SplitNCigarReads { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1479,6 +1481,7 @@ task SplitNCigarReads { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/multiqc.wdl b/multiqc.wdl index db1dd21e..3a1908a6 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -57,7 +57,7 @@ task MultiQC { Array[Boolean] finished = [] # An array of booleans that can be used to let multiqc wait on stuff. String memory = "4G" - + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } @@ -108,6 +108,7 @@ task MultiQC { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -146,6 +147,7 @@ task MultiQC { finished: {description: "An array of booleans that can be used to let multiqc wait on stuff.", category: "internal_use_only"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/picard.wdl b/picard.wdl index 145af9ef..2eb9d410 100644 --- a/picard.wdl +++ b/picard.wdl @@ -26,15 +26,16 @@ task BedToIntervalList { File dict String outputPath = "regions.interval_list" - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { set -e mkdir -p "$(dirname ~{outputPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ BedToIntervalList \ I=~{bedFile} \ O=~{outputPath} \ @@ -47,6 +48,7 @@ task BedToIntervalList { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -204,8 +206,9 @@ task CollectRnaSeqMetrics { String basename String strandSpecificity = "NONE" - String memory = "32G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -213,7 +216,7 @@ task CollectRnaSeqMetrics { set -e mkdir -p "$(dirname ~{basename})" picard -Xmx~{javaXmx} \ - CollectRnaSeqMetrics \ + CollectRnaSeqMetrics -XX:ParallelGCThreads=1 \ I=~{inputBam} \ O=~{basename}.RNA_Metrics \ CHART_OUTPUT=~{basename}.RNA_Metrics.pdf \ @@ -228,6 +231,7 @@ task CollectRnaSeqMetrics { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -245,6 +249,7 @@ task CollectRnaSeqMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -261,15 +266,16 @@ task CollectTargetedPcrMetrics { Array[File]+ targetIntervals String basename - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { set -e mkdir -p "$(dirname ~{basename})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ CollectTargetedPcrMetrics \ I=~{inputBam} \ R=~{referenceFasta} \ @@ -288,6 +294,7 @@ task CollectTargetedPcrMetrics { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -311,6 +318,7 @@ task CollectTargetedPcrMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -381,7 +389,7 @@ task GatherVcfs { command { set -e mkdir -p "$(dirname ~{outputVcfPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherVcfs \ INPUT=~{sep=' INPUT=' inputVcfs} \ OUTPUT=~{outputVcfPath} @@ -491,7 +499,7 @@ task MergeVCFs { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = size(inputVCFs, "G") + Int timeMinutes = ceil(size(inputVCFs, "G")) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -501,7 +509,7 @@ task MergeVCFs { command { set -e mkdir -p "$(dirname ~{outputVcfPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ MergeVcfs \ INPUT=~{sep=' INPUT=' inputVCFs} \ OUTPUT=~{outputVcfPath} @@ -551,7 +559,7 @@ task SamToFastq { command { set -e - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ SamToFastq \ I=~{inputBam} \ ~{"FASTQ=" + outputRead1} \ @@ -584,7 +592,7 @@ task ScatterIntervalList { command { set -e mkdir scatter_list - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ IntervalListTools \ SCATTER_COUNT=~{scatter_count} \ SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ @@ -620,7 +628,7 @@ task SortVcf { command { set -e mkdir -p "$(dirname ~{outputVcfPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ SortVcf \ I=~{sep=" I=" vcfFiles} \ ~{"SEQUENCE_DICTIONARY=" + dict} \ @@ -664,7 +672,7 @@ task RenameSample { command { set -e mkdir -p "$(dirname ~{outputPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ RenameSampleInVcf \ I=~{inputVcf} \ O=~{outputPath} \ diff --git a/samtools.wdl b/samtools.wdl index ca6de261..dc462f82 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -198,13 +198,53 @@ task Markdup { } } +task FilterShortReadsBam { + input { + File bamFile + String outputPathBam + String memory = "1G" + Int timeMinutes = ceil(size(bamFile, "G") * 8) + String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + } + + String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") + + command { + set -e + mkdir -p "$(dirname ~{outputPathBam})" + samtools view -h ~{bamFile} | \ + awk 'length($10) > 30 || $1 ~/^@/' | \ + samtools view -bS -> ~{outputPathBam} + samtools index ~{outputPathBam} ~{outputPathBamIndex} + } + + output { + File filteredBam = outputPathBam + File filteredBamIndex = outputPathBamIndex + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + bamFile: {description: "The bam file to process.", category: "required"} + outputPathBam: {description: "The filtered bam file.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + task Flagstat { input { File inputBam String outputPath String memory = "1G" - Int timeMinutes = size(inputBam, "G") + Int timeMinutes = ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -396,38 +436,4 @@ task View { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} - -task FilterShortReadsBam { - input { - File bamFile - String outputPathBam - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" - } - - String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") - - command { - set -e - mkdir -p "$(dirname ~{outputPathBam})" - samtools view -h ~{bamFile} | \ - awk 'length($10) > 30 || $1 ~/^@/' | \ - samtools view -bS -> ~{outputPathBam} - samtools index ~{outputPathBam} ~{outputPathBamIndex} - } - - output { - File filteredBam = outputPathBam - File filteredBamIndex = outputPathBamIndex - } - - runtime { - docker: dockerImage - } - - parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - } -} +} \ No newline at end of file From 2641a1e9dfea823d9e658b50decf146b0a8ecdd5 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 09:51:43 +0200 Subject: [PATCH 023/902] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smoove.wdl b/smoove.wdl index 93523f3d..1f12ff28 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -59,10 +59,10 @@ task CallSV { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} - referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputDir: {description: "The location the output VCF file should be written.", category: "common"} sample: {description: "The name of the sample", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From e3e0987b1d9679c5c28673c354f9622dc475468c Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 09:51:52 +0200 Subject: [PATCH 024/902] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 1f12ff28..d883cd6e 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -61,7 +61,7 @@ task CallSV { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputDir: {description: "The location the output VCF file should be written.", category: "common"} - sample: {description: "The name of the sample", category: "required"} + sample: {description: "The name of the sample.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 7a2be2edd248edfd5111ed015e84c87038db46a7 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 09:52:01 +0200 Subject: [PATCH 025/902] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index d883cd6e..e1f83f31 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -62,7 +62,7 @@ task CallSV { referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputDir: {description: "The location the output VCF file should be written.", category: "common"} sample: {description: "The name of the sample.", category: "required"} - memory: {description: "The memory required to run the programs", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 5c2503623b3ea6336738fd343c93686fc8a319f3 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 09:52:16 +0200 Subject: [PATCH 026/902] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index e1f83f31..c8c49f9d 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -57,7 +57,7 @@ task CallSV { parameter_meta { # inputs bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputDir: {description: "The location the output VCF file should be written.", category: "common"} From 73f71840924eb148ec2087db799eaf5499c25d31 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 8 Apr 2020 10:16:13 +0200 Subject: [PATCH 027/902] add timeMinutes --- smoove.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/smoove.wdl b/smoove.wdl index c8c49f9d..86eabc69 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -33,6 +33,7 @@ task CallSV { String memory = "15G" String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + Int timeMinutes = 1440 } command { @@ -52,6 +53,8 @@ task CallSV { runtime { memory: memory docker: dockerImage + time_minutes: timeMinutes + } parameter_meta { From c8d443ae7e3474e24b19ffcff7364e5dc3768f23 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 8 Apr 2020 10:17:32 +0200 Subject: [PATCH 028/902] change task name to Call --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 86eabc69..9ec46305 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -22,7 +22,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task CallSV { +task Call { input { File bamFile File bamIndex From 8f1d98a42841e8b6e6b45e917ad8881df9780a1e Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 10:24:56 +0200 Subject: [PATCH 029/902] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 9ec46305..c3ab8f6a 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -38,7 +38,7 @@ task Call { command { set -e - mkdir -p "$(dirname ~{outputDir})" + mkdir -p ~{outputDir} smoove call \ --outdir ~{outputDir} \ --name ~{sample} \ From 3d73a28be412afd95bde9fe0a055f70c8942c506 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 8 Apr 2020 10:40:29 +0200 Subject: [PATCH 030/902] Add baseline value. --- CHANGELOG.md | 2 +- ccs.wdl | 2 +- isoseq3.wdl | 2 +- lima.wdl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7068687..7c41c525 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- -+ Add time_minutes runtime to CCS, Lima & IsoSeq3. ++ Add time_minutes in runtime of CCS, Lima & IsoSeq3. + Lima: Replace mv command with cp. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. diff --git a/ccs.wdl b/ccs.wdl index 983900dc..6a90475b 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -34,7 +34,7 @@ task CCS { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" - Int timeMinutes = ceil(size(subreadsFile, "G") * 240 / cores) + Int timeMinutes = 1 + ceil(size(subreadsFile, "G") * 240 / cores) } command { diff --git a/isoseq3.wdl b/isoseq3.wdl index 0a3f2636..803aa551 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,7 +32,7 @@ task Refine { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" - Int timeMinutes = ceil(size(inputBamFile, "G") * 240 / cores) + Int timeMinutes = 1 + ceil(size(inputBamFile, "G") * 240 / cores) } command <<< diff --git a/lima.wdl b/lima.wdl index 5a9646db..33949aa6 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,7 +51,7 @@ task Lima { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" - Int timeMinutes = ceil(size(inputBamFile, "G") * 240 / cores) + Int timeMinutes = 1 + ceil(size(inputBamFile, "G") * 240 / cores) } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} From e37162e4fce20021a675c5c1aeb01ae0ed7c64f5 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 8 Apr 2020 14:24:42 +0200 Subject: [PATCH 031/902] add meta info for time_minutes --- smoove.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/smoove.wdl b/smoove.wdl index c3ab8f6a..afdd862a 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -66,6 +66,7 @@ task Call { outputDir: {description: "The location the output VCF file should be written.", category: "common"} sample: {description: "The name of the sample.", category: "required"} memory: {description: "The memory required to run the programs.", category: "advanced"} + time_minutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From b4e37856cf5f21f56d5824dc7c01ac1c63cdf4c0 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 8 Apr 2020 14:56:26 +0200 Subject: [PATCH 032/902] update smoove.wdl --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index afdd862a..e8846f72 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -66,7 +66,7 @@ task Call { outputDir: {description: "The location the output VCF file should be written.", category: "common"} sample: {description: "The name of the sample.", category: "required"} memory: {description: "The memory required to run the programs.", category: "advanced"} - time_minutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 605bba8d934f99bf69cefbde03c3fe6d70728c5f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Apr 2020 15:56:07 +0200 Subject: [PATCH 033/902] even more time_minutes --- CPAT.wdl | 3 + bcftools.wdl | 2 +- bedtools.wdl | 62 ++++++----- bowtie.wdl | 2 +- bwa.wdl | 4 +- clever.wdl | 1 + cutadapt.wdl | 287 +++++++++++-------------------------------------- fastqc.wdl | 3 +- gatk.wdl | 2 +- gffcompare.wdl | 3 + gffread.wdl | 3 + hisat2.wdl | 5 +- htseq.wdl | 59 +++------- picard.wdl | 12 +-- samtools.wdl | 11 +- star.wdl | 3 + stringtie.wdl | 123 ++++++--------------- umi-tools.wdl | 2 +- 18 files changed, 184 insertions(+), 403 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index 098d9ca6..8d212b07 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -31,6 +31,7 @@ task CPAT { # CPAT should not index the reference genome. Array[String]? startCodons Array[String]? stopCodons + Int timeMinutes = 1 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } @@ -55,6 +56,7 @@ task CPAT { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -67,6 +69,7 @@ task CPAT { category: "advanced"} startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/bcftools.wdl b/bcftools.wdl index d923885d..53165c6b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -27,7 +27,7 @@ task Bcf2Vcf { File bcf String outputPath = "./bcftools/SV.vcf" String memory = "2G" - Int timeMinutes = ceil(size(bcf, "G")) + Int timeMinutes = 1 + ceil(size(bcf, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } diff --git a/bedtools.wdl b/bedtools.wdl index 4f39e2a8..99bb351e 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -24,8 +24,10 @@ task Complement { input { File faidx File inputBed - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" String outputBed = basename(inputBed, "\.bed") + ".complement.bed" + String memory = "2G" + Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } # Use a fasta index file to get the genome sizes. And convert that to the @@ -44,20 +46,19 @@ task Complement { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes", - category: "required"} - inputBed: {description: "The inputBed to complement", - category: "required"} - outputBed: {description: "The path to write the output to", - category: "advanced"} - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes.", category: "required"} + inputBed: {description: "The inputBed to complement.", category: "required"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } @@ -97,6 +98,8 @@ task MergeBedFiles { input { Array[File]+ bedFiles String outputBed = "merged.bed" + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -111,17 +114,17 @@ task MergeBedFiles { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - bedFiles: {description: "The bed files to merge", - category: "required"} - outputBed: {description: "The path to write the output to", - category: "advanced"} - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + bedFiles: {description: "The bed files to merge.", category: "required"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } @@ -172,6 +175,8 @@ task Intersect { # Giving a faidx file will set the sorted option. File? faidx String outputBed = "intersect.bed" + String memory = "2G" + Int timeMinutes = 1 + ceil([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } Boolean sorted = defined(faidx) @@ -192,21 +197,20 @@ task Intersect { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", category: "common"} - regionsA: {description: "Region file a to intersect", - category: "required"} - regionsB: {description: "Region file b to intersect", - category: "required"} - outputBed: {description: "The path to write the output to", - category: "advanced"} - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + regionsA: {description: "Region file a to intersect", category: "required"} + regionsB: {description: "Region file b to intersect", category: "required"} + outputBed: {description: "The path to write the output to", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/bowtie.wdl b/bowtie.wdl index 94a809fa..87427e7d 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -37,7 +37,7 @@ task Bowtie { String? samRG Int threads = 1 - Int timeMinutes = ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) + Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) String memory = "10G" String picardXmx = "4G" # Image contains bowtie=1.2.2 and picard=2.9.2 diff --git a/bwa.wdl b/bwa.wdl index 247386d8..a39eb3e9 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -31,7 +31,7 @@ task Mem { Int threads = 4 String memory = "20G" String picardXmx = "4G" - Int timeMinutes = ceil(size([read1, read2], "G") * 200 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) # A mulled container is needed to have both picard and bwa in one container. # This container contains: picard (2.18.7), bwa (0.7.17-r1188) String dockerImage = "quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:43ec6124f9f4f875515f9548733b8b4e5fed9aa6-0" @@ -100,7 +100,7 @@ task Kit { String sortMemoryPerThread = "4G" Int compressionLevel = 1 String memory = "20G" - Int timeMinutes = ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" } diff --git a/clever.wdl b/clever.wdl index 2da9f4d2..7e1eac46 100644 --- a/clever.wdl +++ b/clever.wdl @@ -78,6 +78,7 @@ task Mateclever { outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/cutadapt.wdl b/cutadapt.wdl index 421259d9..ad32ff21 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -173,227 +173,70 @@ task Cutadapt { } parameter_meta { - read1: { - description: "The first or single end fastq file to be run through cutadapt.", - category: "required" - } - read2: { - description: "An optional second end fastq file to be run through cutadapt.", - category: "common" - } - read1output: { - description: "The name of the resulting first or single end fastq file.", - category: "common" - } - read2output: { - description: "The name of the resulting second end fastq file.", - category: "common" - } - adapter: { - description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "common" - } - front: { - description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced" - } - anywhere: { - description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced" - } - adapterRead2: { - description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", - category: "common" - } - frontRead2: { - description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced" - } - anywhereRead2: { - description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced" - } - interleaved: { - description: "Equivalent to cutadapt's --interleaved flag.", - category: "advanced" - } - pairFilter: { - description: "Equivalent to cutadapt's --pair-filter option.", - category: "advanced" - } - errorRate: { - description: "Equivalent to cutadapt's --error-rate option.", - category: "advanced" - } - noIndels: { - description: "Equivalent to cutadapt's --no-indels flag.", - category: "advanced" - } - times: { - description: "Equivalent to cutadapt's --times option.", - category: "advanced" - } - overlap: { - description: "Equivalent to cutadapt's --overlap option.", - category: "advanced" - } - matchReadWildcards: { - description: "Equivalent to cutadapt's --match-read-wildcards flag.", - category: "advanced" - } - noMatchAdapterWildcards: { - description: "Equivalent to cutadapt's --no-match-adapter-wildcards flag.", - category: "advanced" - } - noTrim: { - description: "Equivalent to cutadapt's --no-trim flag.", - category: "advanced" - } - maskAdapter: { - description: "Equivalent to cutadapt's --mask-adapter flag.", - category: "advanced" - } - cut: { - description: "Equivalent to cutadapt's --cut option.", - category: "advanced" - } - nextseqTrim: { - description: "Equivalent to cutadapt's --nextseq-trim option.", - category: "advanced" - } - qualityCutoff: { - description: "Equivalent to cutadapt's --quality-cutoff option.", - category: "advanced" - } - qualityBase: { - description: "Equivalent to cutadapt's --quality-base option.", - category: "advanced" - } - length: { - description: "Equivalent to cutadapt's --length option.", - category: "advanced" - } - trimN: { - description: "Equivalent to cutadapt's --trim-n flag.", - category: "advanced" - } - lengthTag: { - description: "Equivalent to cutadapt's --length-tag option.", - category: "advanced" - } - stripSuffix: { - description: "Equivalent to cutadapt's --strip-suffix option.", - category: "advanced" - } - prefix: { - description: "Equivalent to cutadapt's --prefix option.", - category: "advanced" - } - suffix: { - description: "Equivalent to cutadapt's --suffix option.", - category: "advanced" - } - minimumLength: { - description: "Equivalent to cutadapt's --minimum-length option.", - category: "advanced" - } - maximumLength: { - description: "Equivalent to cutadapt's --maximum-length option.", - category: "advanced" - } - maxN: { - description: "Equivalent to cutadapt's --max-n option.", - category: "advanced" - } - discardTrimmed: { - description: "Equivalent to cutadapt's --quality-cutoff option.", - category: "advanced" - } - discardUntrimmed: { - description: "Equivalent to cutadapt's --discard-untrimmed option.", - category: "advanced" - } - infoFilePath: { - description: "Equivalent to cutadapt's --info-file option.", - category: "advanced" - } - restFilePath: { - description: "Equivalent to cutadapt's --rest-file option.", - category: "advanced" - } - wildcardFilePath: { - description: "Equivalent to cutadapt's --wildcard-file option.", - category: "advanced" - } - tooShortOutputPath: { - description: "Equivalent to cutadapt's --too-short-output option.", - category: "advanced" - } - tooLongOutputPath: { - description: "Equivalent to cutadapt's --too-long-output option.", - category: "advanced" - } - untrimmedOutputPath: { - description: "Equivalent to cutadapt's --untrimmed-output option.", - category: "advanced" - } - tooShortPairedOutputPath: { - description: "Equivalent to cutadapt's --too-short-paired-output option.", - category: "advanced" - } - tooLongPairedOutputPath: { - description: "Equivalent to cutadapt's --too-long-paired-output option.", - category: "advanced" - } - untrimmedPairedOutputPath: { - description: "Equivalent to cutadapt's --untrimmed-paired-output option.", - category: "advanced" - } - colorspace: { - description: "Equivalent to cutadapt's --colorspace flag.", - category: "advanced" - } - doubleEncode: { - description: "Equivalent to cutadapt's --double-encode flag.", - category: "advanced" - } - stripF3: { - description: "Equivalent to cutadapt's --strip-f3 flag.", - category: "advanced" - } - maq: { - description: "Equivalent to cutadapt's --maq flag.", - category: "advanced" - } - bwa: { - description: "Equivalent to cutadapt's --bwa flag.", - category: "advanced" - } - zeroCap: { - description: "Equivalent to cutadapt's --zero-cap flag.", - category: "advanced" - } - noZeroCap: { - description: "Equivalent to cutadapt's --no-zero-cap flag.", - category: "advanced" - } - reportPath: { - description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", - category: "common" - } - compressionLevel: {description: "The compression level if gzipped output is used.", - category: "advanced"} - cores: { - description: "The number of cores to use.", - category: "advanced" - } - memory: { - description: "The amount of memory this job will use.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + read1: {description: "The first or single end fastq file to be run through cutadapt.", category: "required"} + read2: {description: "An optional second end fastq file to be run through cutadapt.", category: "common"} + read1output: {description: "The name of the resulting first or single end fastq file.", category: "common"} + read2output: {description: "The name of the resulting second end fastq file.", category: "common"} + adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", + category: "common"} + front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", + category: "advanced"} + anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", + category: "advanced"} + adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", + category: "common"} + frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", + category: "advanced"} + anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", + category: "advanced"} + interleaved: {description: "Equivalent to cutadapt's --interleaved flag.", category: "advanced"} + pairFilter: {description: "Equivalent to cutadapt's --pair-filter option.", category: "advanced"} + errorRate: {description: "Equivalent to cutadapt's --error-rate option.", category: "advanced"} + noIndels: {description: "Equivalent to cutadapt's --no-indels flag.", category: "advanced"} + times: {description: "Equivalent to cutadapt's --times option.", category: "advanced"} + overlap: {description: "Equivalent to cutadapt's --overlap option.", category: "advanced"} + matchReadWildcards: {description: "Equivalent to cutadapt's --match-read-wildcards flag.", category: "advanced"} + noMatchAdapterWildcards: {description: "Equivalent to cutadapt's --no-match-adapter-wildcards flag.", category: "advanced"} + noTrim: {description: "Equivalent to cutadapt's --no-trim flag.", category: "advanced"} + maskAdapter: {description: "Equivalent to cutadapt's --mask-adapter flag.", category: "advanced"} + cut: {description: "Equivalent to cutadapt's --cut option.", category: "advanced"} + nextseqTrim: {description: "Equivalent to cutadapt's --nextseq-trim option.", category: "advanced"} + qualityCutoff: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"} + qualityBase: {description: "Equivalent to cutadapt's --quality-base option.", category: "advanced"} + length: {description: "Equivalent to cutadapt's --length option.", category: "advanced"} + trimN: {description: "Equivalent to cutadapt's --trim-n flag.", category: "advanced"} + lengthTag: {description: "Equivalent to cutadapt's --length-tag option.", category: "advanced"} + stripSuffix: {description: "Equivalent to cutadapt's --strip-suffix option.", category: "advanced"} + prefix: {description: "Equivalent to cutadapt's --prefix option.", category: "advanced"} + suffix: {description: "Equivalent to cutadapt's --suffix option.", category: "advanced"} + minimumLength: {description: "Equivalent to cutadapt's --minimum-length option.", category: "advanced"} + maximumLength: {description: "Equivalent to cutadapt's --maximum-length option.", category: "advanced"} + maxN: {description: "Equivalent to cutadapt's --max-n option.", category: "advanced"} + discardTrimmed: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"} + discardUntrimmed: {description: "Equivalent to cutadapt's --discard-untrimmed option.", category: "advanced"} + infoFilePath: {description: "Equivalent to cutadapt's --info-file option.", category: "advanced"} + restFilePath: {description: "Equivalent to cutadapt's --rest-file option.", category: "advanced"} + wildcardFilePath: {description: "Equivalent to cutadapt's --wildcard-file option.", category: "advanced"} + tooShortOutputPath: {description: "Equivalent to cutadapt's --too-short-output option.", category: "advanced"} + tooLongOutputPath: {description: "Equivalent to cutadapt's --too-long-output option.", category: "advanced"} + untrimmedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-output option.", category: "advanced"} + tooShortPairedOutputPath: {description: "Equivalent to cutadapt's --too-short-paired-output option.", category: "advanced"} + tooLongPairedOutputPath: {description: "Equivalent to cutadapt's --too-long-paired-output option.", category: "advanced"} + untrimmedPairedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-paired-output option.", category: "advanced"} + colorspace: {description: "Equivalent to cutadapt's --colorspace flag.", category: "advanced"} + doubleEncode: {description: "Equivalent to cutadapt's --double-encode flag.", category: "advanced"} + stripF3: {description: "Equivalent to cutadapt's --strip-f3 flag.", category: "advanced"} + maq: {description: "Equivalent to cutadapt's --maq flag.", category: "advanced"} + bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"} + zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"} + noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"} + reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", + category: "common"} + compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"} + cores: {description: "The number of cores to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/fastqc.wdl b/fastqc.wdl index 6708a6bc..606c1bd4 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -40,7 +40,7 @@ task Fastqc { Int threads = 1 # Fastqc uses 250MB per thread in its wrapper. String memory = "~{250 + 250 * threads}M" - Int? timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 + Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile @@ -106,6 +106,7 @@ task Fastqc { dir: {description: "Equivalent to fastqc's --dir option.", category: "advanced"} threads: {description: "The number of cores to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/gatk.wdl b/gatk.wdl index cb26ca75..366b32dd 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -416,7 +416,7 @@ task CombineGVCFs { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = ceil(size(gvcfFiles, "G") * 8) + Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } diff --git a/gffcompare.wdl b/gffcompare.wdl index ca2b1669..197dd9ad 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -44,6 +44,7 @@ task GffCompare { Boolean verbose = false Boolean debugMode = false + Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" # This workaround only works in the input section. @@ -110,6 +111,7 @@ task GffCompare { } runtime { + time_minutes: timeMinutes docker: dockerImage } @@ -134,6 +136,7 @@ task GffCompare { noTmap: {description: "Equivalent to gffcompare's `-T` flag.", category: "advanced"} verbose: {description: "Equivalent to gffcompare's `-V` flag.", category: "advanced"} debugMode: {description: "Equivalent to gffcompare's `-D` flag.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/gffread.wdl b/gffread.wdl index 6b23785c..d83e4d76 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -30,6 +30,7 @@ task GffRead { String? proteinFastaPath String? filteredGffPath Boolean outputGtfFormat = false + Int timeMinutes = 1 + ceil(size(inputGff) * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } @@ -62,6 +63,7 @@ task GffRead { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -73,6 +75,7 @@ task GffRead { proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"} filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/hisat2.wdl b/hisat2.wdl index bc6be2e8..3ea18ee8 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -32,8 +32,9 @@ task Hisat2 { String platform = "illumina" Boolean downstreamTranscriptomeAssembly = true - Int threads = 1 + Int threads = 4 String memory = "48G" + Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools # hisat2=2.1.0, samtools=1.8 @@ -67,6 +68,7 @@ task Hisat2 { runtime { memory: memory cpu: threads + 1 + time_minutes: timeMinutes docker: dockerImage } @@ -82,6 +84,7 @@ task Hisat2 { downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/htseq.wdl b/htseq.wdl index 900a88a7..9fad1714 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -33,6 +33,7 @@ task HTSeqCount { Array[String] additionalAttributes = [] String memory = "40G" + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 60) String dockerImage = "quay.io/biocontainers/htseq:0.11.2--py37h637b7d7_1" } @@ -56,54 +57,24 @@ task HTSeqCount { } runtime { + time_minutes: timeMinutes memory: memory docker: dockerImage } parameter_meta { - inputBams: { - description: "The input BAM files.", - category: "required" - } - gtfFile: { - description: "A GTF/GFF file containing the features of interest.", - category: "required" - } - outputTable: { - description: "The path to which the output table should be written.", - category: "common" - } - format: { - description: "Equivalent to the -f option of htseq-count.", - category: "advanced" - } - order: { - description: "Equivalent to the -r option of htseq-count.", - category: "advanced" - } - stranded: { - description: "Equivalent to the -s option of htseq-count.", - category: "common" - } - featureType: { - description: "Equivalent to the --type option of htseq-count.", - category: "advanced" - } - idattr: { - description: "Equivalent to the --idattr option of htseq-count.", - category: "advanced" - } - additionalAttributes: { - description: "Equivalent to the --additional-attr option of htseq-count.", - category: "advanced" - } - memory: { - description: "The amount of memory the job requires in GB.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + inputBams: {description: "The input BAM files.", category: "required"} + gtfFile: {description: "A GTF/GFF file containing the features of interest.", category: "required"} + outputTable: {description: "The path to which the output table should be written.", category: "common"} + format: {description: "Equivalent to the -f option of htseq-count.", category: "advanced"} + order: {description: "Equivalent to the -r option of htseq-count.", category: "advanced"} + stranded: {description: "Equivalent to the -s option of htseq-count.", category: "common"} + featureType: {description: "Equivalent to the --type option of htseq-count.", category: "advanced"} + idattr: {description: "Equivalent to the --idattr option of htseq-count.", category: "advanced"} + additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"} + memory: {description: "The amount of memory the job requires in GB.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/picard.wdl b/picard.wdl index 2eb9d410..d19e3ac4 100644 --- a/picard.wdl +++ b/picard.wdl @@ -87,7 +87,7 @@ task CollectMultipleMetrics { String memory = "10G" String javaXmx = "8G" - Int timeMinutes = ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -208,7 +208,7 @@ task CollectRnaSeqMetrics { String memory = "10G" String javaXmx = "8G" - Int timeMinutes = ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -268,7 +268,7 @@ task CollectTargetedPcrMetrics { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -333,7 +333,7 @@ task GatherBamFiles { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = ceil(size(inputBams, "G") * 0.5) + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -428,7 +428,7 @@ task MarkDuplicates { String memory = "10G" String javaXmx = "8G" - Int timeMinutes = ceil(size(inputBams, "G")* 8) + Int timeMinutes = 1 + ceil(size(inputBams, "G")* 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" # The program default for READ_NAME_REGEX is appropriate in nearly every case. @@ -499,7 +499,7 @@ task MergeVCFs { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = ceil(size(inputVCFs, "G")) + Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } diff --git a/samtools.wdl b/samtools.wdl index dc462f82..5ffebc9c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -61,6 +61,8 @@ task Index { input { File bamFile String? outputBamPath + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -87,6 +89,8 @@ task Index { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -95,6 +99,8 @@ task Index { bamFile: {description: "The BAM file for which an index should be made.", category: "required"} outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -203,7 +209,7 @@ task FilterShortReadsBam { File bamFile String outputPathBam String memory = "1G" - Int timeMinutes = ceil(size(bamFile, "G") * 8) + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -244,7 +250,7 @@ task Flagstat { String outputPath String memory = "1G" - Int timeMinutes = ceil(size(inputBam, "G")) + Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -268,6 +274,7 @@ task Flagstat { # inputs inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} outputPath: {description: "The location the ouput should be written to.", category: "required"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/star.wdl b/star.wdl index e1e55a26..94cdfa80 100644 --- a/star.wdl +++ b/star.wdl @@ -36,6 +36,7 @@ task Star { Int runThreadN = 4 String memory = "48G" + Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -66,6 +67,7 @@ task Star { runtime { cpu: runThreadN memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -83,6 +85,7 @@ task Star { limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"} runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/stringtie.wdl b/stringtie.wdl index cfaccc92..f1d994b3 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -33,6 +33,7 @@ task Stringtie { Int threads = 1 String memory = "10G" + Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" } @@ -58,54 +59,24 @@ task Stringtie { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - bam: { - description: "The input BAM file.", - category: "required" - } - bamIndex: { - description: "The input BAM file's index.", - category: "required" - } - referenceGtf: { - description: "A reference GTF file to be used as guide.", - category: "common" - } - skipNovelTranscripts: { - description: "Whether new transcripts should be assembled or not.", - category: "common" - } - assembledTranscriptsFile: { - description: "Where the output of the assembly should be written.", - category: "required" - } - firstStranded: { - description: "Equivalent to the --rf flag of stringtie.", - category: "required" - } - secondStranded: { - description: "Equivalent to the --fr flag of stringtie.", - category: "required" - } - geneAbundanceFile: { - description: "Where the abundance file should be written.", - category: "common" - } - threads: { - description: "The number of threads to use.", - category: "advanced" - } - memory: { - description: "The amount of memory needed for this task in GB.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + bam: {description: "The input BAM file.", category: "required"} + bamIndex: {description: "The input BAM file's index.", category: "required"} + referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} + skipNovelTranscripts: {description: "Whether new transcripts should be assembled or not.", category: "common"} + assembledTranscriptsFile: {description: "Where the output of the assembly should be written.", category: "required"} + firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"} + secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"} + geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } @@ -123,6 +94,7 @@ task Merge { String? label String memory = "10G" + Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20) String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" } @@ -148,57 +120,24 @@ task Merge { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - gtfFiles: { - description: "The GTF files produced by stringtie.", - category: "required" - } - outputGtfPath: { - description: "Where the output should be written.", - category: "required" - } - guideGtf: { - description: "Equivalent to the -G option of 'stringtie --merge'.", - category: "advanced" - } - minimumLength: { - description: "Equivalent to the -m option of 'stringtie --merge'.", - category: "advanced" - } - minimumCoverage: { - description: "Equivalent to the -c option of 'stringtie --merge'.", - category: "advanced" - } - minimumFPKM: { - description: "Equivalent to the -F option of 'stringtie --merge'.", - category: "advanced" - } - minimumTPM: { - description: "Equivalent to the -T option of 'stringtie --merge'.", - category: "advanced" - } - minimumIsoformFraction: { - description: "Equivalent to the -f option of 'stringtie --merge'.", - category: "advanced" - } - keepMergedTranscriptsWithRetainedIntrons: { - description: "Equivalent to the -i flag of 'stringtie --merge'.", - category: "advanced" - } - label: { - description: "Equivalent to the -l option of 'stringtie --merge'.", - category: "advanced" - } - memory: { - description: "The amount of memory needed for this task in GB.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + gtfFiles: {description: "The GTF files produced by stringtie.", category: "required"} + outputGtfPath: {description: "Where the output should be written.", category: "required"} + guideGtf: {description: "Equivalent to the -G option of 'stringtie --merge'.", category: "advanced"} + minimumLength: {description: "Equivalent to the -m option of 'stringtie --merge'.", category: "advanced"} + minimumCoverage: {description: "Equivalent to the -c option of 'stringtie --merge'.", category: "advanced"} + minimumFPKM: {description: "Equivalent to the -F option of 'stringtie --merge'.", category: "advanced"} + minimumTPM: {description: "Equivalent to the -T option of 'stringtie --merge'.", category: "advanced"} + minimumIsoformFraction: {description: "Equivalent to the -f option of 'stringtie --merge'.", category: "advanced"} + keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} + label: {description: "Equivalent to the -l option of 'stringtie --merge'.", category: "advanced"} + memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/umi-tools.wdl b/umi-tools.wdl index c44635ce..608924f3 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -76,7 +76,7 @@ task Dedup { Boolean paired = true String memory = "5G" - Int timeMinutes = ceil(size(inputBam, "G") * 18) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 18) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" From 04f0edc21f115e44933a79eea6214234e87bc556 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 8 Apr 2020 16:46:11 +0200 Subject: [PATCH 034/902] Update WDL files. --- CHANGELOG.md | 2 +- ccs.wdl | 7 ++----- isoseq3.wdl | 47 +++++++++++++++-------------------------------- lima.wdl | 7 ++----- 4 files changed, 20 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c41c525..42b0b9e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- -+ Add time_minutes in runtime of CCS, Lima & IsoSeq3. ++ Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. diff --git a/ccs.wdl b/ccs.wdl index 6a90475b..3a8f8879 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -31,10 +31,9 @@ task CCS { File subreadsFile String outputPrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" - Int timeMinutes = 1 + ceil(size(subreadsFile, "G") * 240 / cores) } command { @@ -65,7 +64,6 @@ task CCS { cpu: cores memory: memory docker: dockerImage - time_minutes: timeMinutes } parameter_meta { @@ -81,7 +79,6 @@ task CCS { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputCCSfile: {description: "Consensus reads output file."} diff --git a/isoseq3.wdl b/isoseq3.wdl index 803aa551..44005a40 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -27,58 +27,41 @@ task Refine { String logLevel = "WARN" File inputBamFile File primerFile - String outputPrefix + String outputDir + String outputNamePrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "1G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" - Int timeMinutes = 1 + ceil(size(inputBamFile, "G") * 240 / cores) } command <<< set -e - mkdir -p "$(dirname ~{outputPrefix})" - - # Create a unique output name base on the input bam file. - bamBasename="$(basename ~{inputBamFile})" - bamNewName="${bamBasename/fl/flnc}" - folderDirname="$(dirname ~{outputPrefix})" - combinedOutput="${folderDirname}/${bamNewName}" - + mkdir -p "$(dirname ~{outputDir})" isoseq3 refine \ --min-polya-length ~{minPolyAlength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - --log-file "${bamNewName}.stderr.log" \ + --log-file "~{outputDir}/~{outputNamePrefix}.stderr.log" \ ~{inputBamFile} \ ~{primerFile} \ - ${bamNewName} - - # Copy commands below are needed because naming schema for Refine output - # can not be correctly handled in the WDL output section. - cp "${bamNewName}" "${combinedOutput}" - cp "${bamNewName}.pbi" "${combinedOutput}.pbi" - cp "${bamNewName/bam/consensusreadset}.xml" "${combinedOutput/bam/consensusreadset}.xml" - cp "${bamNewName/bam/filter_summary}.json" "${combinedOutput/bam/filter_summary}.json" - cp "${bamNewName/bam/report}.csv" "${combinedOutput/bam/report}.csv" - cp "${bamNewName}.stderr.log" "${combinedOutput}.stderr.log" + "~{outputDir}/~{outputNamePrefix}.bam" >>> output { - Array[File] outputFLNCfile = glob("*.bam") - Array[File] outputFLNCindexFile = glob("*.bam.pbi") - Array[File] outputConsensusReadsetFile = glob("*.consensusreadset.xml") - Array[File] outputFilterSummaryFile = glob("*.filter_summary.json") - Array[File] outputReportFile = glob("*.report.csv") - Array[File] outputSTDERRfile = glob("*.stderr.log") + File outputFLNCfile = outputDir + "/" + outputNamePrefix + ".bam" + File outputFLNCindexFile = outputDir + "/" + outputNamePrefix + ".bam.pbi" + File outputConsensusReadsetFile = outputDir + "/" + outputNamePrefix + ".consensusreadset.xml" + File outputFilterSummaryFile = outputDir + "/" + outputNamePrefix + ".filter_summary.json" + File outputReportFile = outputDir + "/" + outputNamePrefix + ".report.csv" + File outputSTDERRfile = outputDir + "/" + outputNamePrefix + ".stderr.log" } runtime { cpu: cores memory: memory docker: dockerImage - time_minutes: timeMinutes } parameter_meta { @@ -88,11 +71,11 @@ task Refine { logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "BAM input file.", category: "required"} primerFile: {description: "Barcode/primer fasta file.", category: "required"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + outputNamePrefix: {description: "Basename of the output files.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputFLNCfile: {description: "Filtered reads output file."} diff --git a/lima.wdl b/lima.wdl index 33949aa6..ba8a5407 100644 --- a/lima.wdl +++ b/lima.wdl @@ -48,10 +48,9 @@ task Lima { File barcodeFile String outputPrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" - Int timeMinutes = 1 + ceil(size(inputBamFile, "G") * 240 / cores) } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} @@ -112,7 +111,6 @@ task Lima { cpu: cores memory: memory docker: dockerImage - time_minutes: timeMinutes } parameter_meta { @@ -145,7 +143,6 @@ task Lima { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputFLfile: {description: "Demultiplexed reads output file(s)."} From e232418621388cf28bf450ec79dad1b859219d30 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Apr 2020 10:26:44 +0200 Subject: [PATCH 035/902] fix missing "size(" --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index 99bb351e..4f16b7c0 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -176,7 +176,7 @@ task Intersect { File? faidx String outputBed = "intersect.bed" String memory = "2G" - Int timeMinutes = 1 + ceil([regionsA, regionsB], "G")) + Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } Boolean sorted = defined(faidx) From 9dec796138a9b42b50cd251f25cf08d337c2570e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Apr 2020 10:31:48 +0200 Subject: [PATCH 036/902] remove timeMinutes parameter_meta for mateclever --- clever.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/clever.wdl b/clever.wdl index 7e1eac46..2da9f4d2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -78,7 +78,6 @@ task Mateclever { outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From ea9c340a3126665caad0e04bd9c5aae21491c624 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Apr 2020 11:04:39 +0200 Subject: [PATCH 037/902] fix start time_minutes --- star.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/star.wdl b/star.wdl index 94cdfa80..94a090a8 100644 --- a/star.wdl +++ b/star.wdl @@ -23,7 +23,7 @@ version 1.0 task Star { input { Array[File]+ inputR1 - Array[File]? inputR2 + Array[File] inputR2 = [] Array[File]+ indexFiles String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" @@ -36,7 +36,7 @@ task Star { Int runThreadN = 4 String memory = "48G" - Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / runThreadN) + Int timeMinutes = 1 + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } From a4b3d7a925eeeeee6fa13d6c85e5accf4c23f2a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Apr 2020 16:10:41 +0200 Subject: [PATCH 038/902] update time_minutes --- biopet/biopet.wdl | 2 +- htseq.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 7634e7b0..9004f917 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -266,7 +266,7 @@ task ScatterRegions { String memory = "1G" String javaXmx = "500M" - Int timeMinutes = 1 + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" } diff --git a/htseq.wdl b/htseq.wdl index 9fad1714..35faeef3 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -33,7 +33,7 @@ task HTSeqCount { Array[String] additionalAttributes = [] String memory = "40G" - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 60) + Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) String dockerImage = "quay.io/biocontainers/htseq:0.11.2--py37h637b7d7_1" } From 14d4dfb82412c00c934a994d6dc3a396c63fd0bb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Apr 2020 11:33:33 +0200 Subject: [PATCH 039/902] fix CPAT time_minutes --- CPAT.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index 8d212b07..3b542e4f 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -31,7 +31,7 @@ task CPAT { # CPAT should not index the reference genome. Array[String]? startCodons Array[String]? stopCodons - Int timeMinutes = 1 + ceil(size(gene, "G") * 30) + Int timeMinutes = 10 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } From 4f9b821503b03abdb75becf4e913d32249420444 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Apr 2020 12:20:53 +0200 Subject: [PATCH 040/902] fix gatk java options --- gatk.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 366b32dd..e6b86eed 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -171,7 +171,7 @@ task BaseRecalibrator { command { set -e mkdir -p "$(dirname ~{recalibrationReportPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ BaseRecalibrator \ -R ~{referenceFasta} \ -I ~{inputBam} \ @@ -423,7 +423,7 @@ task CombineGVCFs { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CombineGVCFs \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -706,7 +706,7 @@ task GatherBqsrReports { command { set -e mkdir -p "$(dirname ~{outputReportPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ GatherBQSRReports \ -I ~{sep=' -I ' inputBQSRreports} \ -O ~{outputReportPath} @@ -808,7 +808,7 @@ task GenotypeGVCFs { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ GenotypeGVCFs \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -935,7 +935,7 @@ task HaplotypeCaller { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ HaplotypeCaller \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -1448,7 +1448,7 @@ task SplitNCigarReads { command { set -e mkdir -p "$(dirname ~{outputBam})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ SplitNCigarReads \ -I ~{inputBam} \ -R ~{referenceFasta} \ From ee708b8ea82f9b6d4522dab54f980013f1b9d6a4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 14 Apr 2020 10:34:31 +0200 Subject: [PATCH 041/902] Isoseq3 requires more memory by default. --- CHANGELOG.md | 1 + isoseq3.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dfe8b8d..00113ca3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Isoseq3: Required more memory for common datasets. + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. diff --git a/isoseq3.wdl b/isoseq3.wdl index 44005a40..007aa002 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -31,7 +31,7 @@ task Refine { String outputNamePrefix Int cores = 2 - String memory = "1G" + String memory = "2G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } From c10cc771af14d9160980e2ee2c43a341a958197a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 14 Apr 2020 10:38:49 +0200 Subject: [PATCH 042/902] Update CHANGELOG. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00113ca3..df1032a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- -+ Isoseq3: Required more memory for common datasets. ++ Isoseq3: Requires more memory by default, is now 2G. + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. From 98494a219b12aeef2975c471ddcf241bbef0d457 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 15 Apr 2020 14:23:13 +0200 Subject: [PATCH 043/902] Remove dirname command from output folder creation step. --- CHANGELOG.md | 1 + isoseq3.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index df1032a4..70a8349a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Isoseq3: Remove dirname command from output folder creation step. + Isoseq3: Requires more memory by default, is now 2G. + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. diff --git a/isoseq3.wdl b/isoseq3.wdl index 007aa002..474709a5 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -37,7 +37,7 @@ task Refine { command <<< set -e - mkdir -p "$(dirname ~{outputDir})" + mkdir -p "~{outputDir}" isoseq3 refine \ --min-polya-length ~{minPolyAlength} \ ~{true="--require-polya" false="" requirePolyA} \ From 92c7e92334726396ce1ac9127c3513c86cd33cd0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 15 Apr 2020 14:30:23 +0200 Subject: [PATCH 044/902] Update brackets. --- isoseq3.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isoseq3.wdl b/isoseq3.wdl index 474709a5..10d87bbc 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -35,7 +35,7 @@ task Refine { String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } - command <<< + command { set -e mkdir -p "~{outputDir}" isoseq3 refine \ @@ -47,7 +47,7 @@ task Refine { ~{inputBamFile} \ ~{primerFile} \ "~{outputDir}/~{outputNamePrefix}.bam" - >>> + } output { File outputFLNCfile = outputDir + "/" + outputNamePrefix + ".bam" From 117bde45f272ede019276e2c720c84d2c7d8d7a4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 20 Apr 2020 15:56:04 +0200 Subject: [PATCH 045/902] Add new tasks to samtools and picard. --- CHANGELOG.md | 3 ++ picard.wdl | 43 +++++++++++++++++++++++++++++ samtools.wdl | 78 ++++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 113 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70a8349a..3beefefd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Generalize sort task to now also sort by position, instead of just read name. ++ Add CreateSequenceDictionary task to picard. ++ Add faidx task to samtools. + Isoseq3: Remove dirname command from output folder creation step. + Isoseq3: Requires more memory by default, is now 2G. + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. diff --git a/picard.wdl b/picard.wdl index 7df96aa9..2a0121dc 100644 --- a/picard.wdl +++ b/picard.wdl @@ -313,6 +313,49 @@ task CollectTargetedPcrMetrics { } } +task CreateSequenceDictionary { + input { + File inputFile + String outputDir + String basenameInputFile = basename(inputFile) + + String memory = "2G" + String javaXmx = "2G" + String dockerImage = "quay.io/biocontainers/picard:2.22.3--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputDir})" + picard -Xmx~{javaXmx} \ + CreateSequenceDictionary \ + REFERENCE=~{inputFile} \ + OUTPUT="~{outputDir}/~{basenameInputFile}.dict" + } + + output { + File outputDict = outputDir + "/" + basenameInputFile + ".dict" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + basenameInputFile: {description: "The basename of the input file.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputDict: {description: "Dictionary of the input fasta file."} + } +} + # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs task GatherBamFiles { input { diff --git a/samtools.wdl b/samtools.wdl index a4a893a1..9198119a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -57,6 +57,46 @@ task BgzipAndIndex { } } +task Faidx { + input { + File inputFile + String outputDir + String basenameInputFile = basename(inputFile) + + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + } + + command <<< + set -e + mkdir -p "$(dirname ~{outputDir})" + ln -s ~{inputFile} "~{outputDir}/~{basenameInputFile}" + samtools faidx \ + "~{outputDir}/~{basenameInputFile}" + >>> + + output { + File outputIndex = outputDir + "/" + basenameInputFile + ".fai" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + basenameInputFile: {description: "The basename of the input file.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputIndex: {description: "Index of the input fasta file."} + } +} + task Index { input { File bamFile @@ -136,34 +176,50 @@ task Merge { } } -task SortByName { +task Sort { input { - File bamFile - String outputBamPath = "namesorted.bam" + File inputBam + String outputPrefix + Boolean sortByName = false + String outputFormat = "BAM" - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + Int cores = 1 + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } command { set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools sort -n ~{bamFile} -o ~{outputBamPath} + mkdir -p "$(dirname ~{outputPrefix})" + samtools sort \ + ~{true="-n" false="" sortByName} \ + "--output-fmt " ~{outputFormat} \ + --threads ~{cores} \ + -o "~{outputPrefix}.sorted.bam" \ + ~{inputBam} } output { - File outputBam = outputBamPath + File outputSortedBam = outputPrefix + ".sorted.bam" } runtime { + cpu: cores + memory: memory docker: dockerImage } parameter_meta { # inputs - bamFile: {description: "The BAM file to get sorted.", category: "required"} - outputBamPath: {description: "The location the sorted BAM file should be written to.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + inputFile: {description: "The input SAM file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputSortedBAM: {description: "Sorted BAM file."} } } From 7627c314c98f1d179b5031053d575f13404cc8fc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 20 Apr 2020 16:02:32 +0200 Subject: [PATCH 046/902] Fix travis error. --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 9198119a..20f7ef0a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -211,7 +211,7 @@ task Sort { parameter_meta { # inputs - inputFile: {description: "The input SAM file.", category: "required"} + inputBam: {description: "The input SAM file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -219,7 +219,7 @@ task Sort { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputSortedBAM: {description: "Sorted BAM file."} + outputSortedBam: {description: "Sorted BAM file."} } } From e3890adf453835a2fabca33973bdfe976beb0127 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 09:03:45 +0200 Subject: [PATCH 047/902] Address GitHub comments. --- samtools.wdl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 20f7ef0a..82e82a05 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -70,7 +70,7 @@ task Faidx { command <<< set -e mkdir -p "$(dirname ~{outputDir})" - ln -s ~{inputFile} "~{outputDir}/~{basenameInputFile}" + ln ~{inputFile} "~{outputDir}/~{basenameInputFile}" samtools faidx \ "~{outputDir}/~{basenameInputFile}" >>> @@ -181,20 +181,21 @@ task Sort { File inputBam String outputPrefix Boolean sortByName = false - String outputFormat = "BAM" - Int cores = 1 String memory = "2G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + + Int? threads } command { set -e mkdir -p "$(dirname ~{outputPrefix})" samtools sort \ + "-l 1" ~{true="-n" false="" sortByName} \ - "--output-fmt " ~{outputFormat} \ - --threads ~{cores} \ + "--output-fmt BAM" \ + ~{"--threads " + threads} \ -o "~{outputPrefix}.sorted.bam" \ ~{inputBam} } @@ -204,7 +205,7 @@ task Sort { } runtime { - cpu: cores + cpu: 1 + select_first([threads, 0]) memory: memory docker: dockerImage } @@ -214,9 +215,9 @@ task Sort { inputBam: {description: "The input SAM file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} - cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads that need to be added to the task.", category: "advanced"} # outputs outputSortedBam: {description: "Sorted BAM file."} From 90c0270d1a02c56e28267306138dc0eec4449c81 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 09:21:55 +0200 Subject: [PATCH 048/902] Update documentation. --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 82e82a05..6c523947 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -217,7 +217,7 @@ task Sort { sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - threads: {description: "The number of threads that need to be added to the task.", category: "advanced"} + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} # outputs outputSortedBam: {description: "Sorted BAM file."} From f423e53e31ba53ec6db7ba5aad244b51ea28e6db Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 09:31:38 +0200 Subject: [PATCH 049/902] Add GC parameter. --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 2a0121dc..b78e6039 100644 --- a/picard.wdl +++ b/picard.wdl @@ -328,6 +328,7 @@ task CreateSequenceDictionary { set -e mkdir -p "$(dirname ~{outputDir})" picard -Xmx~{javaXmx} \ + -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ REFERENCE=~{inputFile} \ OUTPUT="~{outputDir}/~{basenameInputFile}.dict" From 441890ee663cfe623c13f0f5b290a5cc6d6524da Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 10:20:49 +0200 Subject: [PATCH 050/902] Address comments. --- picard.wdl | 10 ++++------ samtools.wdl | 18 +++++++++--------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/picard.wdl b/picard.wdl index b78e6039..5a759247 100644 --- a/picard.wdl +++ b/picard.wdl @@ -317,25 +317,24 @@ task CreateSequenceDictionary { input { File inputFile String outputDir - String basenameInputFile = basename(inputFile) - String memory = "2G" + String memory = "3G" String javaXmx = "2G" String dockerImage = "quay.io/biocontainers/picard:2.22.3--0" } command { set -e - mkdir -p "$(dirname ~{outputDir})" + mkdir -p "~{outputDir}" picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ REFERENCE=~{inputFile} \ - OUTPUT="~{outputDir}/~{basenameInputFile}.dict" + OUTPUT="~{outputDir}/$(basename ~{inputFile}).dict" } output { - File outputDict = outputDir + "/" + basenameInputFile + ".dict" + File outputDict = outputDir + "/" + basename(InputFile) + ".dict" } runtime { @@ -347,7 +346,6 @@ task CreateSequenceDictionary { # inputs inputFile: {description: "The input fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} - basenameInputFile: {description: "The basename of the input file.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/samtools.wdl b/samtools.wdl index 6c523947..15ea9a20 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -67,13 +67,13 @@ task Faidx { String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } - command <<< + command { set -e - mkdir -p "$(dirname ~{outputDir})" + mkdir -p "~{outputDir}" ln ~{inputFile} "~{outputDir}/~{basenameInputFile}" samtools faidx \ "~{outputDir}/~{basenameInputFile}" - >>> + } output { File outputIndex = outputDir + "/" + basenameInputFile + ".fai" @@ -179,8 +179,9 @@ task Merge { task Sort { input { File inputBam - String outputPrefix + String outputPath Boolean sortByName = false + Int compressionLevel = 1 String memory = "2G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -190,18 +191,17 @@ task Sort { command { set -e - mkdir -p "$(dirname ~{outputPrefix})" + mkdir -p "~{outputPath}" samtools sort \ - "-l 1" + "-l " ~{compressionLevel} \ ~{true="-n" false="" sortByName} \ - "--output-fmt BAM" \ ~{"--threads " + threads} \ - -o "~{outputPrefix}.sorted.bam" \ + "-o " ~{outputPath} \ ~{inputBam} } output { - File outputSortedBam = outputPrefix + ".sorted.bam" + File outputSortedBam = outputPath } runtime { From a80da1d97ee587ee563a77175dfee0b559cdae80 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 10:44:54 +0200 Subject: [PATCH 051/902] Reinstate symlink. --- picard.wdl | 2 +- samtools.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 5a759247..5393cd3a 100644 --- a/picard.wdl +++ b/picard.wdl @@ -334,7 +334,7 @@ task CreateSequenceDictionary { } output { - File outputDict = outputDir + "/" + basename(InputFile) + ".dict" + File outputDict = outputDir + "/" + basename(inputFile) + ".dict" } runtime { diff --git a/samtools.wdl b/samtools.wdl index 15ea9a20..24ae7f94 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -70,7 +70,7 @@ task Faidx { command { set -e mkdir -p "~{outputDir}" - ln ~{inputFile} "~{outputDir}/~{basenameInputFile}" + ln -s ~{inputFile} "~{outputDir}/~{basenameInputFile}" samtools faidx \ "~{outputDir}/~{basenameInputFile}" } From c03c5562da06382ba5447b7993866d6ba47cd4b3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 11:05:55 +0200 Subject: [PATCH 052/902] Fix travis error. --- samtools.wdl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 24ae7f94..ad7799cb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -61,7 +61,6 @@ task Faidx { input { File inputFile String outputDir - String basenameInputFile = basename(inputFile) String memory = "2G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -70,13 +69,13 @@ task Faidx { command { set -e mkdir -p "~{outputDir}" - ln -s ~{inputFile} "~{outputDir}/~{basenameInputFile}" + ln -s ~{inputFile} "~{outputDir}/$(basename ~{inputFile})" samtools faidx \ - "~{outputDir}/~{basenameInputFile}" + "~{outputDir}/$(basename ~{inputFile})" } output { - File outputIndex = outputDir + "/" + basenameInputFile + ".fai" + File outputIndex = outputDir + "/" + basename(inputFile) + ".fai" } runtime { @@ -88,7 +87,6 @@ task Faidx { # inputs inputFile: {description: "The input fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} - basenameInputFile: {description: "The basename of the input file.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -191,7 +189,7 @@ task Sort { command { set -e - mkdir -p "~{outputPath}" + mkdir -p "$(dirname ~{outputPath})" samtools sort \ "-l " ~{compressionLevel} \ ~{true="-n" false="" sortByName} \ @@ -213,8 +211,9 @@ task Sort { parameter_meta { # inputs inputBam: {description: "The input SAM file.", category: "required"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} From 78cb350a881a67400c7d5b3f62fa337331912e18 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 14:24:08 +0200 Subject: [PATCH 053/902] Update CHANGELOG.md. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3beefefd..4a95484e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Samtools SortByName is now called Sort. + Generalize sort task to now also sort by position, instead of just read name. + Add CreateSequenceDictionary task to picard. + Add faidx task to samtools. From d9265c317f404c26f0bfd0a2950fd716e9204e56 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 28 Apr 2020 13:36:45 +0200 Subject: [PATCH 054/902] Fix quotations in samtools sort. --- CHANGELOG.md | 1 + samtools.wdl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a95484e..c12fcecc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. + Generalize sort task to now also sort by position, instead of just read name. + Add CreateSequenceDictionary task to picard. diff --git a/samtools.wdl b/samtools.wdl index ad7799cb..5521c6aa 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -191,10 +191,10 @@ task Sort { set -e mkdir -p "$(dirname ~{outputPath})" samtools sort \ - "-l " ~{compressionLevel} \ + -l ~{compressionLevel} \ ~{true="-n" false="" sortByName} \ ~{"--threads " + threads} \ - "-o " ~{outputPath} \ + -o ~{outputPath} \ ~{inputBam} } From 169de2d4c1c30245079bcae2e13caac2df1a9f40 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 28 Apr 2020 16:44:22 +0200 Subject: [PATCH 055/902] Fix CHANGELOG. --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c12fcecc..1d330414 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 3.1.0 + +version 3.2.0 --------------------------- + Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. @@ -20,6 +21,9 @@ version 3.1.0 + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. + +version 3.1.0 +--------------------------- + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. + Lima: Add missing output to parameter_meta. From 8aafaabd2bbb4acc175a1aa873ae49c313624d46 Mon Sep 17 00:00:00 2001 From: Jasper Date: Thu, 30 Apr 2020 09:27:54 +0200 Subject: [PATCH 056/902] Update CHANGELOG.md Co-Authored-By: DavyCats --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d330414..e4dea7c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 3.2.0 +version 3.2.0-develop --------------------------- + Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. From 287fc97023de885514d545965a78f77c732d0261 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 May 2020 15:48:38 +0200 Subject: [PATCH 057/902] more time_minutes --- gatk.wdl | 113 ++++++++++++++++++++++++++++++++++++++------------ umi-tools.wdl | 4 +- 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index e6b86eed..31d895fd 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -34,6 +34,7 @@ task AnnotateIntervals { String memory = "10G" String javaXmx = "2G" + Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -57,6 +58,7 @@ task AnnotateIntervals { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -73,6 +75,7 @@ task AnnotateIntervals { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -223,6 +226,7 @@ task CalculateContamination { String memory = "24G" String javaXmx = "12G" + Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -243,6 +247,7 @@ task CalculateContamination { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -252,6 +257,7 @@ task CalculateContamination { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -262,8 +268,9 @@ task CallCopyRatioSegments { String outputPrefix File copyRatioSegments - String memory = "21G" - String javaXmx = "6G" + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -283,6 +290,7 @@ task CallCopyRatioSegments { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -292,6 +300,7 @@ task CallCopyRatioSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -307,8 +316,10 @@ task CollectAllelicCounts { File referenceFasta File referenceFastaDict File referenceFastaFai - String memory = "90G" - String javaXmx = "30G" + + String memory = "12G" + String javaXmx = "10G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -329,6 +340,7 @@ task CollectAllelicCounts { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -344,6 +356,7 @@ task CollectAllelicCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -360,8 +373,9 @@ task CollectReadCounts { File referenceFastaFai String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "35G" - String javaXmx = "7G" + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -384,6 +398,7 @@ task CollectReadCounts { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -399,6 +414,7 @@ task CollectReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -474,8 +490,9 @@ task CombineVariants { Array[File]+ variantIndexes String outputPath - String memory = "24G" + String memory = "16G" String javaXmx = "12G" + Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" } @@ -510,6 +527,7 @@ task CombineVariants { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -527,6 +545,7 @@ task CombineVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -538,8 +557,9 @@ task CreateReadCountPanelOfNormals { Array[File]+ readCountsFiles File? annotatedIntervals - String memory = "21G" - String javaXmx = "7G" + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 5 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer causes a spark related error for some reason... } @@ -559,6 +579,7 @@ task CreateReadCountPanelOfNormals { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -570,6 +591,7 @@ task CreateReadCountPanelOfNormals { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -582,8 +604,9 @@ task DenoiseReadCounts { File readCounts String outputPrefix - String memory = "39G" - String javaXmx = "13G" + String memory = "6G" + String javaXmx = "4G" + Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -606,6 +629,7 @@ task DenoiseReadCounts { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -618,6 +642,7 @@ task DenoiseReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -637,8 +662,9 @@ task FilterMutectCalls { Int uniqueAltReadCount = 4 File mutect2Stats - String memory = "24G" + String memory = "16G" String javaXmx = "12G" + Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -667,6 +693,7 @@ task FilterMutectCalls { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -686,6 +713,7 @@ task FilterMutectCalls { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -743,8 +771,9 @@ task GenomicsDBImport { String genomicsDBWorkspacePath = "genomics_db" String genomicsDBTarFile = "genomics_db.tar.gz" String? tmpDir - String memory = "12G" + String memory = "6G" String javaXmx = "4G" + Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -766,6 +795,7 @@ task GenomicsDBImport { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -780,6 +810,7 @@ task GenomicsDBImport { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -865,8 +896,9 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "24G" + String memory = "16G" String javaXmx = "12G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -886,6 +918,7 @@ task GetPileupSummaries { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -901,6 +934,7 @@ task GetPileupSummaries { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -996,8 +1030,9 @@ task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "24G" + String memory = "16G" String javaXmx = "12G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -1015,6 +1050,7 @@ task LearnReadOrientationModel { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1023,6 +1059,7 @@ task LearnReadOrientationModel { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1032,8 +1069,9 @@ task MergeStats { input { Array[File]+ stats - String memory = "28G" + String memory = "16G" String javaXmx = "14G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1051,6 +1089,7 @@ task MergeStats { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1059,6 +1098,7 @@ task MergeStats { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1076,8 +1116,9 @@ task ModelSegments { else 30 Int maximumNumberOfSmoothingIterations = 10 - String memory = "64G" + String memory = "12G" String javaXmx = "10G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1111,6 +1152,7 @@ task ModelSegments { runtime { docker: dockerImage + time_minute: timeMinutes memory: memory } @@ -1126,6 +1168,7 @@ task ModelSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1149,8 +1192,9 @@ task MuTect2 { Array[File]+ intervals String outputStats = outputVcf + ".stats" - String memory = "16G" + String memory = "6G" String javaXmx = "4G" + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1179,6 +1223,7 @@ task MuTect2 { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1201,6 +1246,7 @@ task MuTect2 { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1215,8 +1261,9 @@ task PlotDenoisedCopyRatios { File denoisedCopyRatios Int? minimumContigLength - String memory = "32G" - String javaXmx = "7G" + String memory = "6G" + String javaXmx = "4G" + Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } @@ -1244,6 +1291,7 @@ task PlotDenoisedCopyRatios { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1257,6 +1305,7 @@ task PlotDenoisedCopyRatios { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1272,8 +1321,9 @@ task PlotModeledSegments { File allelicCounts Int? minimumContigLength - String memory = "21G" - String javaXmx = "7G" + String memory = "6G" + String javaXmx = "4G" + Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } @@ -1297,6 +1347,7 @@ task PlotModeledSegments { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1311,6 +1362,7 @@ task PlotModeledSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1327,8 +1379,9 @@ task PreprocessIntervals { Int padding = if defined(intervals) then 250 else 0 String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "10G" - String javaXmx = "2G" + String memory = "6G" + String javaXmx = "5G" + Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1352,6 +1405,7 @@ task PreprocessIntervals { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1367,6 +1421,7 @@ task PreprocessIntervals { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1382,8 +1437,9 @@ task SelectVariants { String outputPath = "output.vcf.gz" String? selectTypeToInclude Array[File] intervals = [] - String memory = "16G" + String memory = "6G" String javaXmx = "4G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1406,6 +1462,7 @@ task SelectVariants { runtime { docker: dockerImage + time_minute: timeMinutes memory: memory } @@ -1424,6 +1481,7 @@ task SelectVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1498,8 +1556,9 @@ task VariantFiltration { Array[String]+ filterArguments Array[File] intervals = [] - String memory = "16G" + String memory = "6G" String javaXmx = "4G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1522,6 +1581,7 @@ task VariantFiltration { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1540,6 +1600,7 @@ task VariantFiltration { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/umi-tools.wdl b/umi-tools.wdl index 608924f3..bd09853a 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -75,8 +75,8 @@ task Dedup { String? statsPrefix Boolean paired = true - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 18) + String memory = "20G" + Int timeMinutes = 600 + ceil(size(inputBam, "G") * 60) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" From 84ba1ef7e6676fa4a57a78a1976be3b0a1dff05e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 May 2020 14:57:26 +0200 Subject: [PATCH 058/902] time_minutes! --- gatk.wdl | 4 ++-- manta.wdl | 6 ++++++ picard.wdl | 5 ++++- samtools.wdl | 8 +++++++- somaticseq.wdl | 15 +++++++++++++++ strelka.wdl | 6 ++++++ vardict.wdl | 5 ++++- 7 files changed, 44 insertions(+), 5 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 31d895fd..7964b519 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -664,7 +664,7 @@ task FilterMutectCalls { String memory = "16G" String javaXmx = "12G" - Int timeMinutes = 180 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -1071,7 +1071,7 @@ task MergeStats { String memory = "16G" String javaXmx = "14G" - Int timeMinutes = 120 + Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } diff --git a/manta.wdl b/manta.wdl index 5006a01e..5382d2a5 100644 --- a/manta.wdl +++ b/manta.wdl @@ -33,6 +33,7 @@ task Germline { Int cores = 1 Int memoryGb = 4 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } @@ -60,6 +61,7 @@ task Germline { cpu: cores memory: "~{memoryGb}G" docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -74,6 +76,7 @@ task Germline { exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The the number of cores required to run a program", category: "required"} memoryGb: {description: "The memory required to run the manta", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -93,6 +96,7 @@ task Somatic { Int cores = 1 Int memoryGb = 4 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } @@ -130,6 +134,7 @@ task Somatic { cpu: cores memory: "~{memoryGb}G" docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -145,6 +150,7 @@ task Somatic { exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/picard.wdl b/picard.wdl index d19e3ac4..494ccf98 100644 --- a/picard.wdl +++ b/picard.wdl @@ -619,8 +619,9 @@ task SortVcf { String outputVcfPath File? dict - String memory = "24G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -642,6 +643,7 @@ task SortVcf { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -654,6 +656,7 @@ task SortVcf { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/samtools.wdl b/samtools.wdl index 5ffebc9c..bdf811a0 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,6 +26,7 @@ task BgzipAndIndex { String outputDir String type = "vcf" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -44,7 +45,8 @@ task BgzipAndIndex { } runtime { - docker: dockerImage + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -52,6 +54,7 @@ task BgzipAndIndex { inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -395,6 +398,7 @@ task View { Int threads = 1 String memory = "1G" + Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } String outputIndexPath = basename(outputFileName) + ".bai" @@ -424,6 +428,7 @@ task View { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -440,6 +445,7 @@ task View { threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/somaticseq.wdl b/somaticseq.wdl index 49e5c36d..7b9a4403 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -47,6 +47,7 @@ task ParallelPaired { File? strelkaIndel Int threads = 1 + Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -89,6 +90,7 @@ task ParallelPaired { runtime { cpu: threads + time_minutes: timeMinutes docker: dockerImage } @@ -118,6 +120,7 @@ task ParallelPaired { strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -150,6 +153,7 @@ task ParallelPairedTrain { File? strelkaIndel Int threads = 1 + Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -191,6 +195,7 @@ task ParallelPairedTrain { runtime { cpu: threads + time_minutes: timeMinutes docker: dockerImage } @@ -220,6 +225,7 @@ task ParallelPairedTrain { strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -244,6 +250,7 @@ task ParallelSingle { File? strelkaVCF Int threads = 1 + Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -279,6 +286,7 @@ task ParallelSingle { runtime { cpu: threads + time_minutes: timeMinutes docker: dockerImage } @@ -300,6 +308,7 @@ task ParallelSingle { strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -324,6 +333,7 @@ task ParallelSingleTrain { File? strelkaVCF Int threads = 1 + Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -358,6 +368,7 @@ task ParallelSingleTrain { runtime { cpu: threads + time_minutes: timeMinutes docker: dockerImage } @@ -379,6 +390,7 @@ task ParallelSingleTrain { strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -389,6 +401,7 @@ task ModifyStrelka { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") String dockerImage = "lethalfang/somaticseq:3.1.0" + Int timeMinutes = 20 } command { @@ -407,12 +420,14 @@ task ModifyStrelka { } runtime { + time_minutes: timeMinutes docker: dockerImage } parameter_meta { strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/strelka.wdl b/strelka.wdl index 826cbd8e..50c38b55 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -36,6 +36,7 @@ task Germline { Int cores = 1 Int memoryGb = 4 + Int timeMinutes = 90 String dockerImage = "quay.io/biocontainers/strelka:2.9.7--0" } @@ -62,6 +63,7 @@ task Germline { runtime { docker: dockerImage cpu: cores + time_minutes: timeMinutes memory: "~{memoryGb}G" } @@ -78,6 +80,7 @@ task Germline { cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -100,6 +103,7 @@ task Somatic { Int cores = 1 Int memoryGb = 4 + Int timeMinutes = 90 String dockerImage = "quay.io/biocontainers/strelka:2.9.7--0" File? doNotDefineThis #FIXME @@ -131,6 +135,7 @@ task Somatic { runtime { docker: dockerImage cpu: cores + time_minutes: timeMinutes memory: "~{memoryGb}G" } @@ -150,6 +155,7 @@ task Somatic { cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/vardict.wdl b/vardict.wdl index 7bfd118e..ffd05547 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -48,8 +48,9 @@ task VarDict { Float minimumAlleleFrequency = 0.02 Int threads = 1 - String memory = "40G" + String memory = "20G" String javaXmx = "16G" + Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } @@ -87,6 +88,7 @@ task VarDict { runtime { cpu: threads + 2 memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -116,6 +118,7 @@ task VarDict { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 515df50db5b47e108b9d1e0a3c13a1ad269f4104 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 May 2020 16:14:40 +0200 Subject: [PATCH 059/902] minor adjustments --- bedtools.wdl | 15 +++++++-------- biopet/bamstats.wdl | 2 +- biopet/biopet.wdl | 21 +++++++++++++-------- biopet/sampleconfig.wdl | 6 +++--- biopet/seqstat.wdl | 2 +- bowtie.wdl | 5 ----- clever.wdl | 3 +++ collect-columns.wdl | 30 ++++++++++-------------------- common.wdl | 4 +++- fastqc.wdl | 9 +++++---- gatk.wdl | 32 ++++++++++++++++---------------- 11 files changed, 62 insertions(+), 67 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 4f16b7c0..a64cef1a 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -66,6 +66,7 @@ task Merge { input { File inputBed String outputBed = "merged.bed" + Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -78,18 +79,16 @@ task Merge { } runtime { + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - inputBed: {description: "The bed to merge", - category: "required"} - outputBed: {description: "The path to write the output to", - category: "advanced"} - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + inputBed: {description: "The bed to merge.", category: "required"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index 7def9aec..af01bb2e 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -34,7 +34,7 @@ task Generate { String outputDir Reference? reference - String memory = "16G" + String memory = "10G" String javaXmx = "8G" } diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 9004f917..b90c5f4c 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -31,7 +31,7 @@ task BaseCounter { String outputDir String prefix - String memory = "14G" + String memory = "5G" String javaXmx = "4G" } @@ -104,9 +104,10 @@ task ExtractAdaptersFastqc { Float? adapterCutoff Boolean? outputAsFasta - String memory = "40G" # This is ridiculous, but needed due to vmem monitoring on SGE. + String memory = "10G" String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" + Int timeMinutes = 5 } command { @@ -133,6 +134,7 @@ task ExtractAdaptersFastqc { runtime { memory: memory docker: dockerImage + time_minutes: timeMinutes } } @@ -143,7 +145,7 @@ task FastqSplitter { Array[String]+ outputPaths File? toolJar - String memory = "12G" + String memory = "5G" String javaXmx = "4G" String dockerImage = "quay.io/biocontainers/biopet-fastqsplitter:0.1--2" } @@ -175,7 +177,7 @@ task FastqSync { String out2path File? toolJar - String memory = "10G" + String memory = "5G" String javaXmx = "4G" } @@ -216,6 +218,7 @@ task ReorderGlobbedScatters { # The 3.7-slim container is 143 mb on the filesystem. 3.7 is 927 mb. # The slim container is sufficient for this small task. String dockerImage = "python:3.7-slim" + Int timeMinutes = 5 } command <<< @@ -243,12 +246,14 @@ task ReorderGlobbedScatters { runtime { docker: dockerImage + time_minutes = timeMinutes # 4 gigs of memory to be able to build the docker image in singularity memory: "4G" } parameter_meta { scatters: {description: "The files which should be ordered.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -337,7 +342,7 @@ task ValidateAnnotation { File? gtfFile Reference reference - String memory = "9G" + String memory = "4G" String javaXmx = "3G" String dockerImage = "quay.io/biocontainers/biopet-validateannotation:0.1--0" } @@ -363,7 +368,7 @@ task ValidateFastq { input { File read1 File? read2 - String memory = "9G" + String memory = "4G" String javaXmx = "3G" String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--1" } @@ -388,7 +393,7 @@ task ValidateVcf { input { IndexedVcfFile vcf Reference reference - String memory = "9G" + String memory = "4G" String javaXmx = "3G" String dockerImage = "quay.io/biocontainers/biopet-validatevcf:0.1--0" } @@ -432,7 +437,7 @@ task VcfStats { Array[String]+? sparkConfigValues String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" - String memory = "12G" + String memory = "5G" String javaXmx = "4G" } diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl index 0fbd466a..50f26311 100644 --- a/biopet/sampleconfig.wdl +++ b/biopet/sampleconfig.wdl @@ -34,7 +34,7 @@ task SampleConfig { String? jsonOutputPath String? tsvOutputPath - String memory = "8G" + String memory = "18G" String javaXmx = "16G" } @@ -74,7 +74,7 @@ task SampleConfigCromwellArrays { Array[File]+ inputFiles String outputPath - String memory = "8G" + String memory = "5G" String javaXmx = "4G" } @@ -110,7 +110,7 @@ task CaseControl { String outputPath String controlTag = "control" - String memory = "8G" + String memory = "5G" String javaXmx = "4G" } diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl index 6694a759..e3a55ec3 100644 --- a/biopet/seqstat.wdl +++ b/biopet/seqstat.wdl @@ -32,7 +32,7 @@ task Generate { String library String readgroup - String memory = "10G" + String memory = "5G" String javaXmx = "4G" } diff --git a/bowtie.wdl b/bowtie.wdl index 87427e7d..500afea6 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -105,8 +105,3 @@ task Bowtie { category: "advanced"} } } - -struct BowtieIndex { - File fasta - Array[File] indexFiles -} \ No newline at end of file diff --git a/clever.wdl b/clever.wdl index 2da9f4d2..3a6515f7 100644 --- a/clever.wdl +++ b/clever.wdl @@ -37,6 +37,7 @@ task Mateclever { Int threads = 10 String memory = "15G" + Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -63,6 +64,7 @@ task Mateclever { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -78,6 +80,7 @@ task Mateclever { outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/collect-columns.wdl b/collect-columns.wdl index ed2a4577..e4e3a948 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -65,26 +65,16 @@ task CollectColumns { } parameter_meta { - inputTables: {description: "The tables from which columns should be taken.", - category: "required"} - outputPath: {description: "The path to which the output should be written.", - category: "required"} - featureColumn: {description: "Equivalent to the -f option of collect-columns.", - category: "advanced"} - valueColumn: {description: "Equivalent to the -c option of collect-columns.", - category: "advanced"} - separator: {description: "Equivalent to the -s option of collect-columns.", - category: "advanced"} - sampleNames: {description: "Equivalent to the -n option of collect-columns.", - category: "advanced"} - header: {description: "Equivalent to the -H flag of collect-columns.", - category: "advanced"} - additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", - category: "advanced"} - referenceGtf: {description: "Equivalent to the -g option of collect-columns.", - category: "advanced"} - featureAttribute: {description: "Equivalent to the -F option of collect-columns.", - category: "advanced"} + inputTables: {description: "The tables from which columns should be taken.", category: "required"} + outputPath: {description: "The path to which the output should be written.", category: "required"} + featureColumn: {description: "Equivalent to the -f option of collect-columns.", category: "advanced"} + valueColumn: {description: "Equivalent to the -c option of collect-columns.", category: "advanced"} + separator: {description: "Equivalent to the -s option of collect-columns.", category: "advanced"} + sampleNames: {description: "Equivalent to the -n option of collect-columns.", category: "advanced"} + header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} + additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", category: "advanced"} + referenceGtf: {description: "Equivalent to the -g option of collect-columns.", category: "advanced"} + featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} memoryGb: {description: "The maximum amount of memory the job will need in GB", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", diff --git a/common.wdl b/common.wdl index be60f8cf..88848df2 100644 --- a/common.wdl +++ b/common.wdl @@ -179,10 +179,10 @@ task StringArrayMd5 { } task TextToFile { - input { String text String outputFile = "out.txt" + Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -197,11 +197,13 @@ task TextToFile { parameter_meta { text: {description: "The text to print", category: "required"} outputFile: {description: "The name of the output file", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } runtime { memory: "1G" + time_minutes: timeMinutes docker: dockerImage } } diff --git a/fastqc.wdl b/fastqc.wdl index 606c1bd4..e24b6ce4 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -120,6 +120,7 @@ task Fastqc { task GetConfiguration { input { + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/fastqc:0.11.7--4" } @@ -142,13 +143,13 @@ task GetConfiguration { runtime { memory: "2G" # Needs more than 1 to pull the docker image + time_minute: timeMinutes docker: dockerImage } parameter_meta { - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/gatk.wdl b/gatk.wdl index 7964b519..9fb80344 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -32,7 +32,7 @@ task AnnotateIntervals { File? segmentalDuplicationTrack Int featureQueryLookahead = 1000000 - String memory = "10G" + String memory = "3G" String javaXmx = "2G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -224,7 +224,7 @@ task CalculateContamination { File tumorPileups File? normalPileups - String memory = "24G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -490,7 +490,7 @@ task CombineVariants { Array[File]+ variantIndexes String outputPath - String memory = "16G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" @@ -662,7 +662,7 @@ task FilterMutectCalls { Int uniqueAltReadCount = 4 File mutect2Stats - String memory = "16G" + String memory = "14G" String javaXmx = "12G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -806,7 +806,7 @@ task GenomicsDBImport { genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored", category: "advanced"} genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored", category: "advanced"} tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers", - category: "advanced"} + category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -896,7 +896,7 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "16G" + String memory = "14G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1030,7 +1030,7 @@ task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "16G" + String memory = "14G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1192,7 +1192,7 @@ task MuTect2 { Array[File]+ intervals String outputStats = outputVcf + ".stats" - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -1261,8 +1261,8 @@ task PlotDenoisedCopyRatios { File denoisedCopyRatios Int? minimumContigLength - String memory = "6G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } @@ -1321,8 +1321,8 @@ task PlotModeledSegments { File allelicCounts Int? minimumContigLength - String memory = "6G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } @@ -1379,8 +1379,8 @@ task PreprocessIntervals { Int padding = if defined(intervals) then 250 else 0 String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "6G" - String javaXmx = "5G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1437,7 +1437,7 @@ task SelectVariants { String outputPath = "output.vcf.gz" String? selectTypeToInclude Array[File] intervals = [] - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -1556,7 +1556,7 @@ task VariantFiltration { Array[String]+ filterArguments Array[File] intervals = [] - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" From c33f00ed95215f86f8b074fa8d068ac52395919c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 May 2020 16:31:51 +0200 Subject: [PATCH 060/902] various changes --- gatk.wdl | 42 +++++++++++++++++----------------- picard.wdl | 63 ++++++++++++++++++++++++--------------------------- rtg.wdl | 14 ++++++++---- samtools.wdl | 15 ++++++++++++ umi-tools.wdl | 8 ++++--- vardict.wdl | 4 ++-- 6 files changed, 82 insertions(+), 64 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 9fb80344..8e90f88a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -41,7 +41,7 @@ task AnnotateIntervals { command { set -e mkdir -p "$(dirname ~{annotatedIntervalsPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ AnnotateIntervals \ -R ~{referenceFasta} \ -L ~{intervals} \ @@ -102,7 +102,7 @@ task ApplyBQSR { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ ApplyBQSR \ --create-output-bam-md5 \ --add-output-sam-program-record \ @@ -232,7 +232,7 @@ task CalculateContamination { command { set -e - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CalculateContamination \ -I ~{tumorPileups} \ ~{"-matched " + normalPileups} \ @@ -277,7 +277,7 @@ task CallCopyRatioSegments { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CallCopyRatioSegments \ -I ~{copyRatioSegments} \ -O ~{outputPrefix}.called.seg @@ -326,7 +326,7 @@ task CollectAllelicCounts { command { set -e mkdir -p "$(dirname ~{allelicCountsPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CollectAllelicCounts \ -L ~{commonVariantSites} \ -I ~{inputBam} \ @@ -382,7 +382,7 @@ task CollectReadCounts { command { set -e mkdir -p "$(dirname ~{countsPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CollectReadCounts \ -L ~{intervals} \ -I ~{inputBam} \ @@ -511,7 +511,7 @@ task CombineVariants { printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" done ') - java -Xmx~{javaXmx} -jar /usr/GenomeAnalysisTK.jar \ + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 -jar /usr/GenomeAnalysisTK.jar \ -T CombineVariants \ -R ~{referenceFasta} \ --genotypemergeoption ~{genotypeMergeOption} \ @@ -566,7 +566,7 @@ task CreateReadCountPanelOfNormals { command { set -e mkdir -p "$(dirname ~{PONpath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CreateReadCountPanelOfNormals \ -I ~{sep=" -I " readCountsFiles} \ ~{"--annotated-intervals " + annotatedIntervals} \ @@ -613,7 +613,7 @@ task DenoiseReadCounts { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ DenoiseReadCounts \ -I ~{readCounts} \ ~{"--count-panel-of-normals " + PON} \ @@ -671,7 +671,7 @@ task FilterMutectCalls { command { set -e mkdir -p "$(dirname ~{outputVcf})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ FilterMutectCalls \ -R ~{referenceFasta} \ -V ~{unfilteredVcf} \ @@ -780,7 +780,7 @@ task GenomicsDBImport { command { set -e mkdir -p "$(dirname ~{genomicsDBWorkspacePath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ GenomicsDBImport \ -V ~{sep=" -V " gvcfFiles} \ --genomicsdb-workspace-path ~{genomicsDBWorkspacePath} \ @@ -904,7 +904,7 @@ task GetPileupSummaries { command { set -e - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ GetPileupSummaries \ -I ~{sampleBam} \ -V ~{variantsForContamination} \ @@ -1038,7 +1038,7 @@ task LearnReadOrientationModel { command { set -e - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ LearnReadOrientationModel \ -I ~{sep=" -I " f1r2TarGz} \ -O "artifact-priors.tar.gz" @@ -1077,7 +1077,7 @@ task MergeStats { command { set -e - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ MergeMutectStats \ -stats ~{sep=" -stats " stats} \ -O "merged.stats" @@ -1125,7 +1125,7 @@ task ModelSegments { command { set -e mkdir -p ~{outputDir} - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ ModelSegments \ --denoised-copy-ratios ~{denoisedCopyRatios} \ --allelic-counts ~{allelicCounts} \ @@ -1201,7 +1201,7 @@ task MuTect2 { command { set -e mkdir -p "$(dirname ~{outputVcf})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ Mutect2 \ -R ~{referenceFasta} \ -I ~{sep=" -I " inputBams} \ @@ -1270,7 +1270,7 @@ task PlotDenoisedCopyRatios { command { set -e mkdir -p ~{outputDir} - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ PlotDenoisedCopyRatios \ --standardized-copy-ratios ~{standardizedCopyRatios} \ --denoised-copy-ratios ~{denoisedCopyRatios} \ @@ -1330,7 +1330,7 @@ task PlotModeledSegments { command { set -e mkdir -p ~{outputDir} - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ PlotModeledSegments \ --denoised-copy-ratios ~{denoisedCopyRatios} \ --allelic-counts ~{allelicCounts} \ @@ -1388,7 +1388,7 @@ task PreprocessIntervals { command { set -e mkdir -p "$(dirname ~{outputIntervalList})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ PreprocessIntervals \ -R ~{referenceFasta} \ --sequence-dictionary ~{referenceFastaDict} \ @@ -1446,7 +1446,7 @@ task SelectVariants { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ SelectVariants \ -R ~{referenceFasta} \ -V ~{inputVcf} \ @@ -1565,7 +1565,7 @@ task VariantFiltration { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ VariantFiltration \ -I ~{inputVcf} \ -R ~{referenceFasta} \ diff --git a/picard.wdl b/picard.wdl index 494ccf98..c9f9b835 100644 --- a/picard.wdl +++ b/picard.wdl @@ -26,8 +26,8 @@ task BedToIntervalList { File dict String outputPath = "regions.interval_list" - String memory = "5G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -56,8 +56,7 @@ task BedToIntervalList { # inputs bedFile: {description: "A bed file.", category: "required"} dict: {description: "A sequence dict file.", category: "required"} - outputPath: {description: "The location the output interval list should be written to.", - category: "advanced"} + outputPath: {description: "The location the output interval list should be written to.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -162,28 +161,23 @@ task CollectMultipleMetrics { parameter_meta { # inputs - inputBam: {description: "The input BAM file for which metrics will be collected.", - category: "required"} + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - basename: {description: "The basename/prefix of the output files (may include directories).", - category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", category: "advanced"} collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", category: "advanced"} qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", category: "advanced"} - meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", - category: "advanced"} + meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", category: "advanced"} collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", category: "advanced"} - collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", - category: "advanced"} + collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", category: "advanced"} collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", category: "advanced"} collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", @@ -237,12 +231,10 @@ task CollectRnaSeqMetrics { parameter_meta { # inputs - inputBam: {description: "The input BAM file for which metrics will be collected.", - category: "required"} + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} refRefflat: {description: "A refflat file containing gene annotations.", catehory: "required"} - basename: {description: "The basename/prefix of the output files (may include directories).", - category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.", category: "common"} @@ -266,8 +258,8 @@ task CollectTargetedPcrMetrics { Array[File]+ targetIntervals String basename - String memory = "5G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -300,11 +292,9 @@ task CollectTargetedPcrMetrics { parameter_meta { # inputs - inputBam: {description: "The input BAM file for which metrics will be collected.", - category: "required"} + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} @@ -312,8 +302,7 @@ task CollectTargetedPcrMetrics { category: "required"} targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.", category: "required"} - basename: {description: "The basename/prefix of the output files (may include directories).", - category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", @@ -331,8 +320,8 @@ task GatherBamFiles { Array[File]+ inputBamsIndex String outputBamPath - String memory = "5G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -381,8 +370,9 @@ task GatherVcfs { Array[File]+ inputVcfIndexes String outputVcfPath = "out.vcf.gz" - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -402,6 +392,7 @@ task GatherVcfs { runtime { docker: dockerImage memory: memory + time_minutes: timeMinutes } parameter_meta { @@ -413,6 +404,7 @@ task GatherVcfs { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -428,7 +420,7 @@ task MarkDuplicates { String memory = "10G" String javaXmx = "8G" - Int timeMinutes = 1 + ceil(size(inputBams, "G")* 8) + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" # The program default for READ_NAME_REGEX is appropriate in nearly every case. @@ -547,7 +539,7 @@ task SamToFastq { File inputBamIndex Boolean paired = true - String memory = "48G" + String memory = "18G" String javaXmx = "16G" # High memory default to avoid crashes. String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" File? NONE @@ -584,8 +576,8 @@ task ScatterIntervalList { File interval_list Int scatter_count - String memory = "12G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -667,8 +659,9 @@ task RenameSample { File inputVcf String outputPath = "./picard/renamed.vcf" String newSampleName - String memory = "24G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" } @@ -688,6 +681,7 @@ task RenameSample { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -698,6 +692,7 @@ task RenameSample { newSampleName: {description: "A string to replace the old sample name.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/rtg.wdl b/rtg.wdl index 8fd53ca4..03a3f5dc 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -27,7 +27,8 @@ task Format { Array[File]+ inputFiles String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" String rtgMem = "8G" - String memory = "16G" + String memory = "10G" + Int timeMinutes = 1 + ceil(size(inputFiles) * 2) } command { @@ -45,15 +46,17 @@ task Format { runtime { docker: dockerImage memory: memory + time_minutes: timeMinutes } parameter_meta { - format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe] (Default is fasta)", + format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", category: "advanced"} outputPath: {description: "Where the output should be placed.", category: "advanced"} inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} } @@ -77,7 +80,8 @@ task VcfEval { String outputMode = "split" Int threads = 1 # tool default is number of cores in the system 😱 String rtgMem = "8G" - String memory = "16G" + String memory = "10G" + Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } @@ -131,6 +135,7 @@ task VcfEval { docker: dockerImage cpu: threads memory: memory + time_minutes: timeMinutes } parameter_meta { @@ -157,6 +162,7 @@ task VcfEval { threads: {description: "Number of threads. Default is 1", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} } diff --git a/samtools.wdl b/samtools.wdl index bdf811a0..b1c74857 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -115,6 +115,7 @@ task Merge { String outputBamPath = "merged.bam" Boolean force = true + Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -133,6 +134,7 @@ task Merge { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -140,6 +142,7 @@ task Merge { bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -150,6 +153,7 @@ task SortByName { File bamFile String outputBamPath = "namesorted.bam" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -165,12 +169,14 @@ task SortByName { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs bamFile: {description: "The BAM file to get sorted.", category: "required"} outputBamPath: {description: "The location the sorted BAM file should be written to.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -181,6 +187,7 @@ task Markdup { File inputBam String outputBamPath + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -196,12 +203,14 @@ task Markdup { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs inputBam: {description: "The BAM file to be processed.", category: "required"} outputBamPath: {description: "The location of the output BAM file.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -299,6 +308,7 @@ task Fastq { Int threads = 1 String memory = "1G" + Int timeMinutes = 1 + ceil(size(inputBam) * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -327,6 +337,7 @@ task Fastq { cpu: threads memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -342,6 +353,7 @@ task Fastq { outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -352,6 +364,7 @@ task Tabix { File inputFile String outputFilePath = "indexed.vcf.gz" String type = "vcf" + Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } # FIXME: It is better to do the indexing on VCF creation. Not in a separate task. With file localization this gets hairy fast. @@ -371,6 +384,7 @@ task Tabix { } runtime { + time_minutes: timeMinutes docker: dockerImage } @@ -380,6 +394,7 @@ task Tabix { outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/umi-tools.wdl b/umi-tools.wdl index bd09853a..c5f3b145 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -29,7 +29,7 @@ task Extract { Boolean threePrime = false String read1Output = "umi_extracted_R1.fastq.gz" String? read2Output = "umi_extracted_R2.fastq.gz" - + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -51,6 +51,7 @@ task Extract { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -61,6 +62,7 @@ task Extract { threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} read1Output: {description: "The location to write the first/single-end output fastq file to.", category: "advanced"} read2Output: {description: "The location to write the second-end output fastq file to.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -75,8 +77,8 @@ task Dedup { String? statsPrefix Boolean paired = true - String memory = "20G" - Int timeMinutes = 600 + ceil(size(inputBam, "G") * 60) + String memory = "25G" + Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" diff --git a/vardict.wdl b/vardict.wdl index ffd05547..92beb32e 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -48,7 +48,7 @@ task VarDict { Float minimumAlleleFrequency = 0.02 Int threads = 1 - String memory = "20G" + String memory = "18G" String javaXmx = "16G" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" @@ -56,7 +56,7 @@ task VarDict { command { set -e -o pipefail - export JAVA_OPTS="-Xmx~{javaXmx}" + export JAVA_OPTS="-Xmx~{javaXmx} -XX:ParallelGCThreads=1" vardict-java \ ~{"-th " + threads} \ -G ~{referenceFasta} \ From 1a90d56fb7b6ceeaea3bf7e42a1b3f4f549d0da2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 7 May 2020 10:34:04 +0200 Subject: [PATCH 061/902] fix samtools sort --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 1c2f5d78..e123e635 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -195,7 +195,7 @@ task Sort { String memory = "2G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) Int? threads } From ac71e415eb25857073bce330b7b8b43ec38ddd01 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 7 May 2020 11:25:31 +0200 Subject: [PATCH 062/902] fix task order samtools and remove duplicate task --- samtools.wdl | 346 +++++++++++++++++++++++---------------------------- 1 file changed, 156 insertions(+), 190 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index e123e635..5648eb1c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -98,297 +98,301 @@ task Faidx { } } -task Index { +task Fastq { input { - File bamFile - String? outputBamPath - String memory = "2G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + File inputBam + String outputRead1 + String? outputRead2 + String? outputRead0 + Int? includeFilter + Int? excludeFilter + Int? excludeSpecificFilter + Boolean appendReadNumber = false + Boolean outputQuality = false + Int? compressionLevel + + Int threads = 1 + String memory = "1G" + Int timeMinutes = 1 + ceil(size(inputBam) * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - # Select_first is needed, otherwise womtool validate fails. - String outputPath = select_first([outputBamPath, basename(bamFile)]) - String bamIndexPath = sub(outputPath, "\.bam$", ".bai") - command { - bash -c ' - set -e - # Make sure outputBamPath does not exist. - if [ ! -f ~{outputPath} ] - then - mkdir -p "$(dirname ~{outputPath})" - ln ~{bamFile} ~{outputPath} - fi - samtools index ~{outputPath} ~{bamIndexPath} - ' + samtools fastq \ + ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ + ~{"-2 " + outputRead2} \ + ~{"-0 " + outputRead0} \ + ~{"-f " + includeFilter} \ + ~{"-F " + excludeFilter} \ + ~{"-G " + excludeSpecificFilter} \ + ~{true="-N" false="-n" appendReadNumber} \ + ~{true="-O" false="" outputQuality} \ + ~{"-c " + compressionLevel} \ + ~{"--threads " + threads} \ + ~{inputBam} } output { - File indexedBam = outputPath - File index = bamIndexPath + File read1 = outputRead1 + File? read2 = outputRead2 + File? read0 = outputRead0 } runtime { + cpu: threads memory: memory - time_minutes: timeMinutes docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs - bamFile: {description: "The BAM file for which an index should be made.", category: "required"} - outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", - category: "common"} - memory: {description: "The amount of memory needed for the job.", category: "advanced"} + inputBam: {description: "The bam file to process.", category: "required"} + outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} + outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} + outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} + includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} + excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} + excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} + appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} + outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Merge { +task FilterShortReadsBam { input { - Array[File]+ bamFiles - String outputBamPath = "merged.bam" - Boolean force = true - - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + File bamFile + String outputPathBam + String memory = "1G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - String indexPath = sub(outputBamPath, "\.bam$",".bai") + + String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") command { set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} - samtools index ~{outputBamPath} ~{indexPath} + mkdir -p "$(dirname ~{outputPathBam})" + samtools view -h ~{bamFile} | \ + awk 'length($10) > 30 || $1 ~/^@/' | \ + samtools view -bS -> ~{outputPathBam} + samtools index ~{outputPathBam} ~{outputPathBamIndex} } output { - File outputBam = outputBamPath - File outputBamIndex = indexPath + File filteredBam = outputPathBam + File filteredBamIndex = outputPathBamIndex } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - # inputs - bamFiles: {description: "The BAM files to merge.", category: "required"} - outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + bamFile: {description: "The bam file to process.", category: "required"} + outputPathBam: {description: "The filtered bam file.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Sort { +task Flagstat { input { File inputBam String outputPath - Boolean sortByName = false - Int compressionLevel = 1 - String memory = "2G" - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - Int? threads + String memory = "1G" + Int timeMinutes = 1 + ceil(size(inputBam, "G")) + String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } command { set -e mkdir -p "$(dirname ~{outputPath})" - samtools sort \ - -l ~{compressionLevel} \ - ~{true="-n" false="" sortByName} \ - ~{"--threads " + threads} \ - -o ~{outputPath} \ - ~{inputBam} + samtools flagstat ~{inputBam} > ~{outputPath} } output { - File outputSortedBam = outputPath + File flagstat = outputPath } runtime { - cpu: 1 + select_first([threads, 0]) memory: memory - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs - inputBam: {description: "The input SAM file.", category: "required"} - outputPath: {description: "Output directory path + output file.", category: "required"} - sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} + outputPath: {description: "The location the ouput should be written to.", category: "required"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - # outputs - outputSortedBam: {description: "Sorted BAM file."} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } -task Markdup { +task Index { input { - File inputBam - String outputBamPath - - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + File bamFile + String? outputBamPath + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } + # Select_first is needed, otherwise womtool validate fails. + String outputPath = select_first([outputBamPath, basename(bamFile)]) + String bamIndexPath = sub(outputPath, "\.bam$", ".bai") + command { + bash -c ' set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools markdup ~{inputBam} ~{outputBamPath} + # Make sure outputBamPath does not exist. + if [ ! -f ~{outputPath} ] + then + mkdir -p "$(dirname ~{outputPath})" + ln ~{bamFile} ~{outputPath} + fi + samtools index ~{outputPath} ~{bamIndexPath} + ' } output { - File outputBam = outputBamPath + File indexedBam = outputPath + File index = bamIndexPath } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs - inputBam: {description: "The BAM file to be processed.", category: "required"} - outputBamPath: {description: "The location of the output BAM file.", category: "required"} + bamFile: {description: "The BAM file for which an index should be made.", category: "required"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", + category: "common"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task FilterShortReadsBam { +task Markdup { input { - File bamFile - String outputPathBam - String memory = "1G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) + File inputBam + String outputBamPath + + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") - command { set -e - mkdir -p "$(dirname ~{outputPathBam})" - samtools view -h ~{bamFile} | \ - awk 'length($10) > 30 || $1 ~/^@/' | \ - samtools view -bS -> ~{outputPathBam} - samtools index ~{outputPathBam} ~{outputPathBamIndex} + mkdir -p "$(dirname ~{outputBamPath})" + samtools markdup ~{inputBam} ~{outputBamPath} } output { - File filteredBam = outputPathBam - File filteredBamIndex = outputPathBamIndex + File outputBam = outputBamPath } runtime { - memory: memory - time_minutes: timeMinutes docker: dockerImage + time_minutes: timeMinutes } parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + # inputs + inputBam: {description: "The BAM file to be processed.", category: "required"} + outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } -task Flagstat { +task Merge { input { - File inputBam - String outputPath + Array[File]+ bamFiles + String outputBamPath = "merged.bam" + Boolean force = true - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inputBam, "G")) + Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } + String indexPath = sub(outputBamPath, "\.bam$",".bai") command { set -e - mkdir -p "$(dirname ~{outputPath})" - samtools flagstat ~{inputBam} > ~{outputPath} + mkdir -p "$(dirname ~{outputBamPath})" + samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} + samtools index ~{outputBamPath} ~{indexPath} } output { - File flagstat = outputPath + File outputBam = outputBamPath + File outputBamIndex = indexPath } runtime { - memory: memory - time_minutes: timeMinutes docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs - inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} - outputPath: {description: "The location the ouput should be written to.", category: "required"} - memory: {description: "The amount of memory needed for the job.", category: "advanced"} + bamFiles: {description: "The BAM files to merge.", category: "required"} + outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Fastq { +task Sort { input { File inputBam - String outputRead1 - String? outputRead2 - String? outputRead0 - Int? includeFilter - Int? excludeFilter - Int? excludeSpecificFilter - Boolean appendReadNumber = false - Boolean outputQuality = false - Int? compressionLevel + String outputPath + Boolean sortByName = false + Int compressionLevel = 1 - Int threads = 1 - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int? threads } command { - samtools fastq \ - ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ - ~{"-2 " + outputRead2} \ - ~{"-0 " + outputRead0} \ - ~{"-f " + includeFilter} \ - ~{"-F " + excludeFilter} \ - ~{"-G " + excludeSpecificFilter} \ - ~{true="-N" false="-n" appendReadNumber} \ - ~{true="-O" false="" outputQuality} \ - ~{"-c " + compressionLevel} \ + set -e + mkdir -p "$(dirname ~{outputPath})" + samtools sort \ + -l ~{compressionLevel} \ + ~{true="-n" false="" sortByName} \ ~{"--threads " + threads} \ + -o ~{outputPath} \ ~{inputBam} } output { - File read1 = outputRead1 - File? read2 = outputRead2 - File? read0 = outputRead0 + File outputSortedBam = outputPath } runtime { - cpu: threads + cpu: 1 + select_first([threads, 0]) memory: memory docker: dockerImage time_minutes: timeMinutes @@ -396,20 +400,16 @@ task Fastq { parameter_meta { # inputs - inputBam: {description: "The bam file to process.", category: "required"} - outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} - outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} - outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} - includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} - excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} - excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} - appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} - outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + # outputs + outputSortedBam: {description: "Sorted BAM file."} } } @@ -518,38 +518,4 @@ task View { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} - -task FilterShortReadsBam { - input { - File bamFile - String outputPathBam - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" - } - - String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") - - command { - set -e - mkdir -p "$(dirname ~{outputPathBam})" - samtools view -h ~{bamFile} | \ - awk 'length($10) > 30 || $1 ~/^@/' | \ - samtools view -bS -> ~{outputPathBam} - samtools index ~{outputPathBam} ~{outputPathBamIndex} - } - - output { - File filteredBam = outputPathBam - File filteredBamIndex = outputPathBamIndex - } - - runtime { - docker: dockerImage - } - - parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - } -} +} \ No newline at end of file From 859b722f2c671bd03c21a1dc563f8adde0199c57 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 May 2020 14:45:27 +0200 Subject: [PATCH 063/902] add options specific for RNA haplotype calling --- gatk.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index b730cbee..33a43520 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -910,6 +910,8 @@ task HaplotypeCaller { String? outputMode Boolean gvcf = false String emitRefConfidence = if gvcf then "GVCF" else "NONE" + Boolean dontUseSoftClippedBases = false + Float standardMinConfidenceThresholdForCalling String memory = "12G" String javaXmx = "4G" @@ -931,7 +933,9 @@ task HaplotypeCaller { ~{"--pedigree " + pedigree} \ ~{"--contamination-fraction-per-sample-file " + contamination} \ ~{"--output-mode " + outputMode} \ - --emit-ref-confidence ~{emitRefConfidence} + --emit-ref-confidence ~{emitRefConfidence} \ + ~{true="--dont-use-soft-clipped-bases" false="" dontUseSoftClippedBases} \ + ~{"--standard-min-confidence-threshold-for-calling " + standardMinConfidenceThresholdForCalling} } output { From 5ca28720b68d844baeca013a19a5e14a45b25a85 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 May 2020 16:14:19 +0200 Subject: [PATCH 064/902] make minconfidence treshold optional --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 33a43520..6c28ab68 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -911,7 +911,7 @@ task HaplotypeCaller { Boolean gvcf = false String emitRefConfidence = if gvcf then "GVCF" else "NONE" Boolean dontUseSoftClippedBases = false - Float standardMinConfidenceThresholdForCalling + Float? standardMinConfidenceThresholdForCalling String memory = "12G" String javaXmx = "4G" From 6b8a7551655010f6680c93806ea35cc4521b2a8c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 11 May 2020 11:09:00 +0200 Subject: [PATCH 065/902] time_minutes --- centrifuge.wdl | 12 ++++++++++++ minimap2.wdl | 6 ++++++ survivor.wdl | 3 +++ talon.wdl | 24 ++++++++++++++++++++++++ transcriptclean.wdl | 9 +++++++++ vt.wdl | 3 +++ 6 files changed, 57 insertions(+) diff --git a/centrifuge.wdl b/centrifuge.wdl index 1fbc7be1..f2b26043 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -37,6 +37,7 @@ task Build { Int threads = 5 String memory = "20G" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -64,6 +65,7 @@ task Build { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -82,6 +84,7 @@ task Build { sizeTable: {description: "List of taxonomic IDs and lengths of the sequences belonging to the same taxonomic IDs.", category: "common"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -184,6 +187,7 @@ task Inspect { Int? across String memory = "4G" + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -210,6 +214,7 @@ task Inspect { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -220,6 +225,7 @@ task Inspect { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} across: {description: "When printing FASTA output, output a newline character every bases.", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -307,6 +313,7 @@ task Kreport { Int? minimumLength String memory = "4G" + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -335,6 +342,7 @@ task Kreport { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -349,6 +357,7 @@ task Kreport { minimumScore: {description: "Require a minimum score for reads to be counted.", category: "advanced"} minimumLength: {description: "Require a minimum alignment length to the read.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -362,6 +371,7 @@ task KTimportTaxonomy { String outputPrefix String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/krona:v2.7.1_cv1" } @@ -379,6 +389,7 @@ task KTimportTaxonomy { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -387,6 +398,7 @@ task KTimportTaxonomy { inputFile: {description: "File with Centrifuge classification results.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/minimap2.wdl b/minimap2.wdl index fd28d4a9..04b02bf2 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -32,6 +32,7 @@ task Indexing { Int cores = 1 String memory = "4G" + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" } @@ -55,6 +56,7 @@ task Indexing { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -68,6 +70,7 @@ task Indexing { splitIndex: {description: "Split index for every ~NUM input bases.", category: "advanced"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output @@ -96,6 +99,7 @@ task Mapping { Int cores = 4 String memory = "30G" + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" } @@ -128,6 +132,7 @@ task Mapping { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -149,6 +154,7 @@ task Mapping { queryFile: {description: "Input fasta file.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output diff --git a/survivor.wdl b/survivor.wdl index ded11d75..e5ac7b5b 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -33,6 +33,7 @@ task Merge { Int minSize = 30 String outputPath = "./survivor/merged.vcf" String memory = "24G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } @@ -57,6 +58,7 @@ task Merge { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -71,6 +73,7 @@ task Merge { minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/talon.wdl b/talon.wdl index 6ddb841e..c6402fe4 100644 --- a/talon.wdl +++ b/talon.wdl @@ -31,6 +31,7 @@ task CreateAbundanceFileFromDatabase { File? datasetsFile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -52,6 +53,7 @@ task CreateAbundanceFileFromDatabase { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -64,6 +66,7 @@ task CreateAbundanceFileFromDatabase { whitelistFile: {description: "Whitelist file of transcripts to include in the output.", category: "advanced"} datasetsFile: {description: "A file indicating which datasets should be included.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -84,6 +87,7 @@ task CreateGtfFromDatabase { File? datasetFile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -106,6 +110,7 @@ task CreateGtfFromDatabase { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -119,6 +124,7 @@ task CreateGtfFromDatabase { whitelistFile: {description: "Whitelist file of transcripts to include in the output.", category: "advanced"} datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -135,6 +141,7 @@ task FilterTalonTranscripts { File? pairingsFile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -154,6 +161,7 @@ task FilterTalonTranscripts { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -164,6 +172,7 @@ task FilterTalonTranscripts { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} pairingsFile: {description: "A file indicating which datasets should be considered together.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -180,6 +189,7 @@ task GetReadAnnotations { File? datasetFile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -199,6 +209,7 @@ task GetReadAnnotations { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -209,6 +220,7 @@ task GetReadAnnotations { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -228,6 +240,7 @@ task InitializeTalonDatabase { String outputPrefix String memory = "10G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -251,6 +264,7 @@ task InitializeTalonDatabase { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -265,6 +279,7 @@ task InitializeTalonDatabase { cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -277,6 +292,7 @@ task ReformatGtf { File GTFfile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -292,6 +308,7 @@ task ReformatGtf { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -299,6 +316,7 @@ task ReformatGtf { # inputs GTFfile: {description: "GTF annotation containing genes, transcripts, and edges.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -315,6 +333,7 @@ task SummarizeDatasets { File? datasetGroupsCSV String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -334,6 +353,7 @@ task SummarizeDatasets { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -344,6 +364,7 @@ task SummarizeDatasets { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -364,6 +385,7 @@ task Talon { Int cores = 4 String memory = "25G" + Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -399,6 +421,7 @@ task Talon { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -414,6 +437,7 @@ task Talon { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 68bcbf24..8c62190f 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -28,6 +28,7 @@ task GetSJsFromGtf { Int minIntronSize = 21 String memory = "8G" + Int timeMinutes = 1 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -47,6 +48,7 @@ task GetSJsFromGtf { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -57,6 +59,7 @@ task GetSJsFromGtf { minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -70,6 +73,7 @@ task GetTranscriptCleanStats { String outputPrefix String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -87,6 +91,7 @@ task GetTranscriptCleanStats { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -95,6 +100,7 @@ task GetTranscriptCleanStats { transcriptCleanSAMfile: {description: "Output SAM file from TranscriptClean", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -124,6 +130,7 @@ task TranscriptClean { Int cores = 1 String memory = "25G" + Int timeMinutes = 2880 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -159,6 +166,7 @@ task TranscriptClean { runtime { cpu: cores memory: memory + time_minute: timeMinutes docker: dockerImage } @@ -181,6 +189,7 @@ task TranscriptClean { variantFile: {description: "VCF formatted file of variants.", category: "common"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/vt.wdl b/vt.wdl index 54599db0..d4c134b9 100644 --- a/vt.wdl +++ b/vt.wdl @@ -29,6 +29,7 @@ task Normalize { String outputPath = "./vt/normalized_decomposed.vcf" String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" String memory = "4G" + Int timeMinutes = 30 } command { @@ -43,6 +44,7 @@ task Normalize { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -54,6 +56,7 @@ task Normalize { referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 73867fd5b045fdd1068f3b968a5e5927ee5a86ef Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 11 May 2020 11:27:09 +0200 Subject: [PATCH 066/902] Record change in changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4dea7c7..153fa69c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- ++ GATK.HaplotypeCaller: Add `--dont-use-soft-clipped-bases` and + `--standard-min-confidence-threshold-for-calling` options. These are + required for RNA seq variant calling according to GATK best practices. + Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. + Generalize sort task to now also sort by position, instead of just read name. From 6693dcfa829bb120c7d9c40450b8f34a615b300f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 11 May 2020 11:27:21 +0200 Subject: [PATCH 067/902] Add extra parameter_meta options for haplotypecaller --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 6c28ab68..586c25d0 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -966,6 +966,8 @@ task HaplotypeCaller { category: "advanced"} emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'", category: "advanced"} + dontUseSoftClippedBases: {description: "Do not use soft-clipped bases. Should be 'true' for RNA variant calling.", category: "common"} + standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} From 3b8ca6a86272af3ae2603ede47305e5416785093 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 13 May 2020 13:21:46 +0200 Subject: [PATCH 068/902] time_minutes --- ccs.wdl | 3 +++ isoseq3.wdl | 3 +++ lima.wdl | 3 +++ talon.wdl | 14 +++++++------- transcriptclean.wdl | 4 ++-- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index 3a8f8879..d428053f 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -33,6 +33,7 @@ task CCS { Int cores = 2 String memory = "2G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" } @@ -63,6 +64,7 @@ task CCS { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -78,6 +80,7 @@ task CCS { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/isoseq3.wdl b/isoseq3.wdl index 10d87bbc..d241027c 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,6 +32,7 @@ task Refine { Int cores = 2 String memory = "2G" + Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } @@ -61,6 +62,7 @@ task Refine { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -75,6 +77,7 @@ task Refine { outputNamePrefix: {description: "Basename of the output files.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/lima.wdl b/lima.wdl index ba8a5407..77bcf320 100644 --- a/lima.wdl +++ b/lima.wdl @@ -50,6 +50,7 @@ task Lima { Int cores = 2 String memory = "2G" + Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" } @@ -110,6 +111,7 @@ task Lima { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -142,6 +144,7 @@ task Lima { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/talon.wdl b/talon.wdl index c6402fe4..b2ae3a62 100644 --- a/talon.wdl +++ b/talon.wdl @@ -31,7 +31,7 @@ task CreateAbundanceFileFromDatabase { File? datasetsFile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -87,7 +87,7 @@ task CreateGtfFromDatabase { File? datasetFile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -141,7 +141,7 @@ task FilterTalonTranscripts { File? pairingsFile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -189,7 +189,7 @@ task GetReadAnnotations { File? datasetFile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -240,7 +240,7 @@ task InitializeTalonDatabase { String outputPrefix String memory = "10G" - Int timeMinutes = 1 + Int timeMinutes = 60 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -292,7 +292,7 @@ task ReformatGtf { File GTFfile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -333,7 +333,7 @@ task SummarizeDatasets { File? datasetGroupsCSV String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 50 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 8c62190f..15da1f58 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -28,7 +28,7 @@ task GetSJsFromGtf { Int minIntronSize = 21 String memory = "8G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -73,7 +73,7 @@ task GetTranscriptCleanStats { String outputPrefix String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } From e39ea045beac06432a338f885972946938f479a4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 14 May 2020 10:27:35 +0200 Subject: [PATCH 069/902] adjust time_minuteso --- ccs.wdl | 2 +- isoseq3.wdl | 2 +- lima.wdl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index d428053f..1762ac75 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -33,7 +33,7 @@ task CCS { Int cores = 2 String memory = "2G" - Int timeMinutes = 120 + Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" } diff --git a/isoseq3.wdl b/isoseq3.wdl index d241027c..9e0dfdb2 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,7 +32,7 @@ task Refine { Int cores = 2 String memory = "2G" - Int timeMinutes = 180 + Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } diff --git a/lima.wdl b/lima.wdl index 77bcf320..ddd37da4 100644 --- a/lima.wdl +++ b/lima.wdl @@ -50,7 +50,7 @@ task Lima { Int cores = 2 String memory = "2G" - Int timeMinutes = 1440 + Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" } From a4319c3e3a89af6f38310f1560b3c6e6cc6932f4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 14 May 2020 10:37:42 +0200 Subject: [PATCH 070/902] update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 153fa69c..a167222c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,14 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- ++ Adjusted the memory settings of many tools, especially java tools. + The should now more accurately represent actual memory usage (as + opposed to virtual memory). ++ Added `-XX:ParallelGCThreads=1` to the java options of java tasks. ++ Added `timeMinutes` input to many tasks, this indicates a maximum + number of minutes that the job will run. The associated runtime + attribute is `time_minutes` which can be used to inform + a scheduler (eg. slurm) of the run time of the job. + GATK.HaplotypeCaller: Add `--dont-use-soft-clipped-bases` and `--standard-min-confidence-threshold-for-calling` options. These are required for RNA seq variant calling according to GATK best practices. From fa1901c451dc3465e94d1b3b36be26a7260203b7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 14 May 2020 16:47:17 +0200 Subject: [PATCH 071/902] adress comments --- CHANGELOG.md | 4 +++- bedtools.wdl | 9 +++++--- biopet/bamstats.wdl | 2 +- biopet/biopet.wdl | 51 +---------------------------------------- biopet/sampleconfig.wdl | 2 +- biowdl.wdl | 2 +- bowtie.wdl | 2 +- bwa.wdl | 2 +- common.wdl | 2 +- gatk.wdl | 24 +++++++++---------- hisat2.wdl | 2 +- picard.wdl | 12 +++++----- rtg.wdl | 4 ++-- star.wdl | 2 +- stringtie.wdl | 2 +- 15 files changed, 39 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05e79ac3..540fbbf0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,10 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- ++ The struct `BowtieIndex` was removed, as it has become obsolete. ++ The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. - The should now more accurately represent actual memory usage (as + They should now more accurately represent actual memory usage (as opposed to virtual memory). + Added `-XX:ParallelGCThreads=1` to the java options of java tasks. + Added `timeMinutes` input to many tasks, this indicates a maximum diff --git a/bedtools.wdl b/bedtools.wdl index a64cef1a..c228d6c6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -25,7 +25,7 @@ task Complement { File faidx File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" - String memory = "2G" + String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -66,6 +66,7 @@ task Merge { input { File inputBed String outputBed = "merged.bed" + String memory = "~{512 + ceil(size(inputBed, "M"))}M" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -79,6 +80,7 @@ task Merge { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -86,6 +88,7 @@ task Merge { parameter_meta { inputBed: {description: "The bed to merge.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -97,7 +100,7 @@ task MergeBedFiles { input { Array[File]+ bedFiles String outputBed = "merged.bed" - String memory = "2G" + String memory = "~{512 + ceil(size(bedFiles, "M"))}M" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -174,7 +177,7 @@ task Intersect { # Giving a faidx file will set the sorted option. File? faidx String outputBed = "intersect.bed" - String memory = "2G" + String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index af01bb2e..d71355d3 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -34,7 +34,7 @@ task Generate { String outputDir Reference? reference - String memory = "10G" + String memory = "9G" String javaXmx = "8G" } diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index b90c5f4c..d56ed574 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -104,7 +104,7 @@ task ExtractAdaptersFastqc { Float? adapterCutoff Boolean? outputAsFasta - String memory = "10G" + String memory = "9G" String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" Int timeMinutes = 5 @@ -210,55 +210,6 @@ task FastqSync { } } -task ReorderGlobbedScatters { - input { - Array[File]+ scatters - - # Should not be changed from the main pipeline. As it should not influence results. - # The 3.7-slim container is 143 mb on the filesystem. 3.7 is 927 mb. - # The slim container is sufficient for this small task. - String dockerImage = "python:3.7-slim" - Int timeMinutes = 5 - } - - command <<< - set -e - # Copy all the scatter files to the CWD so the output matches paths in - # the cwd. - for file in ~{sep=" " scatters} - do cp $file . - done - python << CODE - from os.path import basename - scatters = ['~{sep="','" scatters}'] - splitext = [basename(x).split(".") for x in scatters] - splitnum = [x.split("-") + [y] for x,y in splitext] - ordered = sorted(splitnum, key=lambda x: int(x[1])) - merged = ["{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] - for x in merged: - print(x) - CODE - >>> - - output { - Array[File] reorderedScatters = read_lines(stdout()) - } - - runtime { - docker: dockerImage - time_minutes = timeMinutes - # 4 gigs of memory to be able to build the docker image in singularity - memory: "4G" - } - - parameter_meta { - scatters: {description: "The files which should be ordered.", category: "required"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - task ScatterRegions { input { File referenceFasta diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl index 50f26311..2b36952b 100644 --- a/biopet/sampleconfig.wdl +++ b/biopet/sampleconfig.wdl @@ -34,7 +34,7 @@ task SampleConfig { String? jsonOutputPath String? tsvOutputPath - String memory = "18G" + String memory = "17G" String javaXmx = "16G" } diff --git a/biowdl.wdl b/biowdl.wdl index 7661a592..838755d9 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -52,7 +52,7 @@ task InputConverter { } runtime { - memory: "2G" + memory: "128M" time_minutes: timeMinutes docker: dockerImage } diff --git a/bowtie.wdl b/bowtie.wdl index 500afea6..b3f3ceae 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -38,7 +38,7 @@ task Bowtie { Int threads = 1 Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) - String memory = "10G" + String memory = "~{5 + ceil(size(indexFiles, "G"))}G" String picardXmx = "4G" # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" diff --git a/bwa.wdl b/bwa.wdl index a39eb3e9..01dae9b4 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,7 +29,7 @@ task Mem { String? readgroup Int threads = 4 - String memory = "20G" + String memory = "~{5 + ceil(size(indexFiles, "G"))}G" String picardXmx = "4G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) # A mulled container is needed to have both picard and bwa in one container. diff --git a/common.wdl b/common.wdl index 88848df2..f8325523 100644 --- a/common.wdl +++ b/common.wdl @@ -214,7 +214,7 @@ task YamlToJson { String outputJson = basename(yaml, "\.ya?ml$") + ".json" Int timeMinutes = 1 - String memory = "1G" + String memory = "128M" # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } diff --git a/gatk.wdl b/gatk.wdl index ff30b2ba..edafc4d4 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -317,7 +317,7 @@ task CollectAllelicCounts { File referenceFastaDict File referenceFastaFai - String memory = "12G" + String memory = "11G" String javaXmx = "10G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -373,8 +373,8 @@ task CollectReadCounts { File referenceFastaFai String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "5G" - String javaXmx = "4G" + String memory = "8G" + String javaXmx = "7G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -557,8 +557,8 @@ task CreateReadCountPanelOfNormals { Array[File]+ readCountsFiles File? annotatedIntervals - String memory = "5G" - String javaXmx = "4G" + String memory = "8G" + String javaXmx = "7G" Int timeMinutes = 5 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer causes a spark related error for some reason... } @@ -604,7 +604,7 @@ task DenoiseReadCounts { File readCounts String outputPrefix - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -662,7 +662,7 @@ task FilterMutectCalls { Int uniqueAltReadCount = 4 File mutect2Stats - String memory = "14G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -771,7 +771,7 @@ task GenomicsDBImport { String genomicsDBWorkspacePath = "genomics_db" String genomicsDBTarFile = "genomics_db.tar.gz" String? tmpDir - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -896,7 +896,7 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "14G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1036,7 +1036,7 @@ task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "14G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1075,7 +1075,7 @@ task MergeStats { input { Array[File]+ stats - String memory = "16G" + String memory = "15G" String javaXmx = "14G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -1122,7 +1122,7 @@ task ModelSegments { else 30 Int maximumNumberOfSmoothingIterations = 10 - String memory = "12G" + String memory = "11G" String javaXmx = "10G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" diff --git a/hisat2.wdl b/hisat2.wdl index 3ea18ee8..7d638f1f 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -33,7 +33,7 @@ task Hisat2 { Boolean downstreamTranscriptomeAssembly = true Int threads = 4 - String memory = "48G" + String memory = "~{threads + 5 + ceil(size(indexFiles, "G"))}G" Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools diff --git a/picard.wdl b/picard.wdl index a63c1ba2..9d401631 100644 --- a/picard.wdl +++ b/picard.wdl @@ -84,7 +84,7 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -200,7 +200,7 @@ task CollectRnaSeqMetrics { String basename String strandSpecificity = "NONE" - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -460,7 +460,7 @@ task MarkDuplicates { String outputBamPath String metricsPath - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -581,7 +581,7 @@ task SamToFastq { File inputBamIndex Boolean paired = true - String memory = "18G" + String memory = "17G" String javaXmx = "16G" # High memory default to avoid crashes. String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" File? NONE @@ -653,7 +653,7 @@ task SortVcf { String outputVcfPath File? dict - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -701,7 +701,7 @@ task RenameSample { File inputVcf String outputPath = "./picard/renamed.vcf" String newSampleName - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" diff --git a/rtg.wdl b/rtg.wdl index 03a3f5dc..104a5ef9 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -27,7 +27,7 @@ task Format { Array[File]+ inputFiles String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" String rtgMem = "8G" - String memory = "10G" + String memory = "9G" Int timeMinutes = 1 + ceil(size(inputFiles) * 2) } @@ -80,7 +80,7 @@ task VcfEval { String outputMode = "split" Int threads = 1 # tool default is number of cores in the system 😱 String rtgMem = "8G" - String memory = "10G" + String memory = "9G" Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } diff --git a/star.wdl b/star.wdl index 7824c764..11fde466 100644 --- a/star.wdl +++ b/star.wdl @@ -101,7 +101,7 @@ task Star { Int? limitBAMsortRAM Int runThreadN = 4 - String memory = "48G" + String memory = "~{5 + ceil(size(indexFiles, "G"))}G" Int timeMinutes = 1 + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } diff --git a/stringtie.wdl b/stringtie.wdl index f1d994b3..5ed62dea 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -32,7 +32,7 @@ task Stringtie { String? geneAbundanceFile Int threads = 1 - String memory = "10G" + String memory = "2G" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" } From c535d259e32ffb2ed409585c8d9bb1d2a61a0008 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 15 May 2020 10:23:58 +0200 Subject: [PATCH 072/902] fix variable used for time_minutes estimation in bwa --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 01dae9b4..3dd7883b 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,7 +29,7 @@ task Mem { String? readgroup Int threads = 4 - String memory = "~{5 + ceil(size(indexFiles, "G"))}G" + String memory = "~{5 + ceil(size(bwaIndex.indexFiles, "G"))}G" String picardXmx = "4G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) # A mulled container is needed to have both picard and bwa in one container. From c3dbda9b0af69681b0ab21b44fd4c23ec06f9745 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 09:12:12 +0200 Subject: [PATCH 073/902] MultiQC should work directly with report files --- multiqc.wdl | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index db1dd21e..85ce58df 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -23,8 +23,8 @@ version 1.0 task MultiQC { input { # Use a string here so cromwell does not relocate an entire analysis directory - String analysisDirectory - Array[File] dependencies = [] # This must be used in order to run multiqc after these tasks. + Array[File] reports + String reportDir = "reports" Boolean force = false Boolean dirs = false Int? dirsDepth @@ -62,6 +62,23 @@ task MultiQC { } command { + # Below code requires python 3.6 or higher. + # This makes sure all report files are in a report directory that MultiQC can investigate. + python3 < Date: Mon, 18 May 2020 09:21:12 +0200 Subject: [PATCH 074/902] fix parameter_meta' --- multiqc.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index 85ce58df..dec91f7c 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -129,8 +129,7 @@ task MultiQC { } parameter_meta { - analysisDirectory: {description: "The directory to run MultiQC on.", category: "required"} - dependencies: {description: "This must be used in order to run multiqc after these tasks.", category: "internal_use_only"} + reports: {description: "Reports which multiqc should run on.", category: "required"} force: {description: "Equivalent to MultiQC's `--force` flag.", category: "advanced"} dirs: {description: "Equivalent to MultiQC's `--dirs` flag.", category: "advanced"} dirsDepth: {description: "Equivalent to MultiQC's `--dirs-depth` option.", category: "advanced"} From 44a372eda41c9b37e3e28d2aa4d67bf5340827b4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 10:12:03 +0200 Subject: [PATCH 075/902] add reportfile output to star --- star.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/star.wdl b/star.wdl index 4c407331..5d0d6b6c 100644 --- a/star.wdl +++ b/star.wdl @@ -127,6 +127,7 @@ task Star { output { File bamFile = outFileNamePrefix + "Aligned." + samOutputNames[outSAMtype] + File logFinalOut = outFileNamePrefix + "Log.final.out" } runtime { From e1806e2ea3946b6be28df6a39a234dc2a5a691c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 11:03:40 +0200 Subject: [PATCH 076/902] add summary file to hisat2 task --- hisat2.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hisat2.wdl b/hisat2.wdl index bc6be2e8..aafa3331 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -31,6 +31,7 @@ task Hisat2 { String readgroup String platform = "illumina" Boolean downstreamTranscriptomeAssembly = true + String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" Int threads = 1 String memory = "48G" @@ -55,6 +56,7 @@ task Hisat2 { --rg 'LB:~{library}' \ --rg 'PL:~{platform}' \ ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ + --summary-file ~{summaryFilePath} \ | samtools sort > ~{outputBam} samtools index ~{outputBam} ~{bamIndexPath} } @@ -62,6 +64,7 @@ task Hisat2 { output { File bamFile = outputBam File bamIndex = bamIndexPath + File summaryFile = summaryFilePath } runtime { From cf652ee34d5c39e5b841dcc048c457dad332624a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 11:15:49 +0200 Subject: [PATCH 077/902] hisat2.wdl --- hisat2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hisat2.wdl b/hisat2.wdl index aafa3331..b65b2da9 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -56,7 +56,7 @@ task Hisat2 { --rg 'LB:~{library}' \ --rg 'PL:~{platform}' \ ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ - --summary-file ~{summaryFilePath} \ + --new-summary ~{summaryFilePath} \ | samtools sort > ~{outputBam} samtools index ~{outputBam} ~{bamIndexPath} } From 17cdbd13a8ea3288eb0e334cbb81a694f64b659e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 13:30:15 +0200 Subject: [PATCH 078/902] make pdf files optional --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index 5393cd3a..e9275504 100644 --- a/picard.wdl +++ b/picard.wdl @@ -114,7 +114,7 @@ task CollectMultipleMetrics { File baitBiasDetail = basename + ".bait_bias_detail_metrics" File baitBiasSummary = basename + ".bait_bias_summary_metrics" File baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" - File baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" + File? baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" File errorSummary = basename + ".error_summary_metrics" File gcBiasDetail = basename + ".gc_bias.detail_metrics" File gcBiasPdf = basename + ".gc_bias.pdf" @@ -124,9 +124,9 @@ task CollectMultipleMetrics { File preAdapterDetail = basename + ".pre_adapter_detail_metrics" File preAdapterSummary = basename + ".pre_adapter_summary_metrics" File qualityByCycle = basename + ".quality_by_cycle_metrics" - File qualityByCyclePdf = basename + ".quality_by_cycle.pdf" + File? qualityByCyclePdf = basename + ".quality_by_cycle.pdf" File qualityDistribution = basename + ".quality_distribution_metrics" - File qualityDistributionPdf = basename + ".quality_distribution.pdf" + File? qualityDistributionPdf = basename + ".quality_distribution.pdf" File qualityYield = basename + ".quality_yield_metrics" # Using a glob is easier. But will lead to very ugly output directories. Array[File] allStats = select_all([ From e055553df027fb2047570e68c6aaf38f9a93faf3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 May 2020 15:06:13 +0200 Subject: [PATCH 079/902] fix missing parameter_meta --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 9d401631..057b1919 100644 --- a/picard.wdl +++ b/picard.wdl @@ -60,6 +60,7 @@ task BedToIntervalList { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From f5f38a7b67e35ff291de2de8739ae7d941871c3a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 15:12:09 +0200 Subject: [PATCH 080/902] add allFiles output to gffcompare --- gffcompare.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/gffcompare.wdl b/gffcompare.wdl index ca2b1669..bf4be325 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -107,6 +107,7 @@ task GffCompare { File? missedIntrons = if debugMode then totalPrefix + ".missed_introns.gtf" else noneFile + Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons]) } runtime { From efd99dda531a5f0e1381d9217a2c5853c16e967b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 15:31:08 +0200 Subject: [PATCH 081/902] repair hisat2 --- hisat2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hisat2.wdl b/hisat2.wdl index b65b2da9..b662dcb7 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -56,7 +56,8 @@ task Hisat2 { --rg 'LB:~{library}' \ --rg 'PL:~{platform}' \ ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ - --new-summary ~{summaryFilePath} \ + --new-summary \ + --summary-file ~{summaryFilePath} \ | samtools sort > ~{outputBam} samtools index ~{outputBam} ~{bamIndexPath} } From dbe93a9e8c275996cb3f2a846ce56b607926eb88 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 19 May 2020 10:06:56 +0200 Subject: [PATCH 082/902] Make a number of outputs optional in Picard. --- CHANGELOG.md | 3 +++ picard.wdl | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 540fbbf0..010d81aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- ++ Picard: Make a number of outputs in `CollectMultipleMetrics` optional. BamMetrics + will fail if `CollectAlignmentSummaryMetrics` & `MeanQualityByCycle` in this + task are not optional. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. diff --git a/picard.wdl b/picard.wdl index 057b1919..c17029e2 100644 --- a/picard.wdl +++ b/picard.wdl @@ -113,7 +113,7 @@ task CollectMultipleMetrics { } output { - File alignmentSummary = basename + ".alignment_summary_metrics" + File? alignmentSummary = basename + ".alignment_summary_metrics" File baitBiasDetail = basename + ".bait_bias_detail_metrics" File baitBiasSummary = basename + ".bait_bias_summary_metrics" File baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" @@ -126,10 +126,10 @@ task CollectMultipleMetrics { File? insertSize = basename + ".insert_size_metrics" File preAdapterDetail = basename + ".pre_adapter_detail_metrics" File preAdapterSummary = basename + ".pre_adapter_summary_metrics" - File qualityByCycle = basename + ".quality_by_cycle_metrics" - File qualityByCyclePdf = basename + ".quality_by_cycle.pdf" + File? qualityByCycle = basename + ".quality_by_cycle_metrics" + File? qualityByCyclePdf = basename + ".quality_by_cycle.pdf" File qualityDistribution = basename + ".quality_distribution_metrics" - File qualityDistributionPdf = basename + ".quality_distribution.pdf" + File? qualityDistributionPdf = basename + ".quality_distribution.pdf" File qualityYield = basename + ".quality_yield_metrics" # Using a glob is easier. But will lead to very ugly output directories. Array[File] allStats = select_all([ From 42c6d473d8b67c0f0d3387b6a62438780268ad2d Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 19 May 2020 10:16:23 +0200 Subject: [PATCH 083/902] Update CHANGELOG text. --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 010d81aa..d4719dcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,8 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- + Picard: Make a number of outputs in `CollectMultipleMetrics` optional. BamMetrics - will fail if `CollectAlignmentSummaryMetrics` & `MeanQualityByCycle` in this - task are not optional. + will fail if `CollectAlignmentSummaryMetrics` & `MeanQualityByCycle` are set + to false and their outputs are not optional. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. From 28cb8e8d2584c3ee95be609daf0ac50d2ac547e2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 19 May 2020 10:23:55 +0200 Subject: [PATCH 084/902] Set all outputs to optional. --- CHANGELOG.md | 5 ++--- picard.wdl | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4719dcd..65a37944 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,8 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- -+ Picard: Make a number of outputs in `CollectMultipleMetrics` optional. BamMetrics - will fail if `CollectAlignmentSummaryMetrics` & `MeanQualityByCycle` are set - to false and their outputs are not optional. ++ Picard: Make all outputs in `CollectMultipleMetrics`. This will make sure the + task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. diff --git a/picard.wdl b/picard.wdl index c17029e2..136081be 100644 --- a/picard.wdl +++ b/picard.wdl @@ -114,25 +114,25 @@ task CollectMultipleMetrics { output { File? alignmentSummary = basename + ".alignment_summary_metrics" - File baitBiasDetail = basename + ".bait_bias_detail_metrics" - File baitBiasSummary = basename + ".bait_bias_summary_metrics" - File baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" - File baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" - File errorSummary = basename + ".error_summary_metrics" - File gcBiasDetail = basename + ".gc_bias.detail_metrics" - File gcBiasPdf = basename + ".gc_bias.pdf" - File gcBiasSummary = basename + ".gc_bias.summary_metrics" + File? baitBiasDetail = basename + ".bait_bias_detail_metrics" + File? baitBiasSummary = basename + ".bait_bias_summary_metrics" + File? baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" + File? baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" + File? errorSummary = basename + ".error_summary_metrics" + File? gcBiasDetail = basename + ".gc_bias.detail_metrics" + File? gcBiasPdf = basename + ".gc_bias.pdf" + File? gcBiasSummary = basename + ".gc_bias.summary_metrics" File? insertSizeHistogramPdf = basename + ".insert_size_histogram.pdf" File? insertSize = basename + ".insert_size_metrics" - File preAdapterDetail = basename + ".pre_adapter_detail_metrics" - File preAdapterSummary = basename + ".pre_adapter_summary_metrics" + File? preAdapterDetail = basename + ".pre_adapter_detail_metrics" + File? preAdapterSummary = basename + ".pre_adapter_summary_metrics" File? qualityByCycle = basename + ".quality_by_cycle_metrics" File? qualityByCyclePdf = basename + ".quality_by_cycle.pdf" - File qualityDistribution = basename + ".quality_distribution_metrics" + File? qualityDistribution = basename + ".quality_distribution_metrics" File? qualityDistributionPdf = basename + ".quality_distribution.pdf" - File qualityYield = basename + ".quality_yield_metrics" + File? qualityYield = basename + ".quality_yield_metrics" # Using a glob is easier. But will lead to very ugly output directories. - Array[File] allStats = select_all([ + Array[File]? allStats = select_all([ alignmentSummary, baitBiasDetail, baitBiasSummary, From ab21de84c00609d3549ee8dc2278d929ff2c160b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 19 May 2020 10:24:54 +0200 Subject: [PATCH 085/902] Update CHANGELOG. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65a37944..dfa50280 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- -+ Picard: Make all outputs in `CollectMultipleMetrics`. This will make sure the ++ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will make sure the task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. From 48aef36c6ac5a0db0e88bca6ba9702b942c30136 Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 19 May 2020 10:35:58 +0200 Subject: [PATCH 086/902] Update picard.wdl Co-authored-by: DavyCats --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 136081be..3103ad9b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -132,7 +132,7 @@ task CollectMultipleMetrics { File? qualityDistributionPdf = basename + ".quality_distribution.pdf" File? qualityYield = basename + ".quality_yield_metrics" # Using a glob is easier. But will lead to very ugly output directories. - Array[File]? allStats = select_all([ + Array[File] allStats = select_all([ alignmentSummary, baitBiasDetail, baitBiasSummary, From f8198099cfa46fdc971764cb6a5bf30496291303 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 19 May 2020 13:16:44 +0200 Subject: [PATCH 087/902] fix missing paramter_meta --- biopet/biopet.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index d56ed574..cc8e1bc6 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -280,6 +280,7 @@ task ScatterRegions { bamIndex: {description: "The index for the bamfile given through bamFile.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 408f06c6ed8cf2780f28a28cdc802b02b432f0d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 19 May 2020 15:40:55 +0200 Subject: [PATCH 088/902] Update changelog with changes and to a newer version --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa50280..ccd1cedc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,13 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 3.2.0-develop +version 4.0.0-develop --------------------------- ++ Added a log output for STAR. ++ Added report output to Hisat2. ++ Change MultiQC inputs. It now accepts an array of reports files. It does not + need access to a folder with the reports anymore. MultiQC can now be used + as a normal WDL task without hacks. + Picard: Make all outputs in `CollectMultipleMetrics` optional. This will make sure the task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. From d3832132227b8a2197ac0d0b602281223f44c82d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 19 May 2020 16:04:43 +0200 Subject: [PATCH 089/902] cleanup multiqc. Add comments on how it works --- multiqc.wdl | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index dec91f7c..b50122e0 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -24,7 +24,6 @@ task MultiQC { input { # Use a string here so cromwell does not relocate an entire analysis directory Array[File] reports - String reportDir = "reports" Boolean force = false Boolean dirs = false Int? dirsDepth @@ -37,13 +36,11 @@ task MultiQC { String? tag String? ignore String? ignoreSamples - Boolean ignoreSymlinks = false File? sampleNames File? fileList Array[String]+? exclude Array[String]+? module Boolean dataDir = false - Boolean noDataDir = false String? dataFormat Boolean zipDataDir = false Boolean export = false @@ -54,16 +51,27 @@ task MultiQC { Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload. File? config # A directory String? clConfig - Array[Boolean] finished = [] # An array of booleans that can be used to let multiqc wait on stuff. - + String memory = "4G" String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } + # This is where the reports end up. It does not need to be changed by the + # user. It is full of symbolic links, so it is not of any use to the user + # anyway. + String reportDir = "reports" + + # Below code requires python 3.6 or higher. + # This makes sure all report files are in a report directory that + # MultiQC can investigate. + # This creates files in report_dir / hashed_parent / file basename. + # By hashing the parent path we make sure there are no file colissions as + # files from the same directory end up in the same directory, while files + # from other directories get their own directory. Cromwell also uses this + # strategy. Using python's builtin hash is unique enough for these purposes. + command { - # Below code requires python 3.6 or higher. - # This makes sure all report files are in a report directory that MultiQC can investigate. python3 < Date: Tue, 19 May 2020 16:09:19 +0200 Subject: [PATCH 090/902] Parameter meta for multiqc updated. Make sure data dir output is always zipped by default. --- multiqc.wdl | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index 62ca5421..6a967b3f 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -42,7 +42,7 @@ task MultiQC { Array[String]+? module Boolean dataDir = false String? dataFormat - Boolean zipDataDir = false + Boolean zipDataDir = true Boolean export = false Boolean flat = false Boolean interactive = true @@ -128,7 +128,7 @@ task MultiQC { output { File multiqcReport = outDir + "/" + reportFilename + "_report.html" - File multiqcDataDir = outDir + "/" +reportFilename + "_data" + File? multiqcDataDirZip = outDir + "/" +reportFilename + "_data.zip" } runtime { @@ -151,13 +151,11 @@ task MultiQC { tag: {description: "Equivalent to MultiQC's `--tag` option.", category: "advanced"} ignore: {description: "Equivalent to MultiQC's `--ignore` option.", category: "advanced"} ignoreSamples: {description: "Equivalent to MultiQC's `--ignore-samples` option.", category: "advanced"} - ignoreSymlinks: {description: "Equivalent to MultiQC's `--ignore-symlinks` flag.", category: "advanced"} sampleNames: {description: "Equivalent to MultiQC's `--sample-names` option.", category: "advanced"} fileList: {description: "Equivalent to MultiQC's `--file-list` option.", category: "advanced"} exclude: {description: "Equivalent to MultiQC's `--exclude` option.", category: "advanced"} module: {description: "Equivalent to MultiQC's `--module` option.", category: "advanced"} - dataDir: {description: "Equivalent to MultiQC's `--data-dir` flag.", category: "advanced"} - noDataDir: {description: "Equivalent to MultiQC's `--no-data-dir` flag.", category: "advanced"} + dataDir: {description: "Whether to output a data dir. Sets `--data-dir` or `--no-data-dir` flag.", category: "advanced"} dataFormat: {description: "Equivalent to MultiQC's `--data-format` option.", category: "advanced"} zipDataDir: {description: "Equivalent to MultiQC's `--zip-data-dir` flag.", category: "advanced"} export: {description: "Equivalent to MultiQC's `--export` flag.", category: "advanced"} @@ -168,8 +166,6 @@ task MultiQC { megaQCUpload: {description: "Opposite to MultiQC's `--no-megaqc-upload` flag.", category: "advanced"} config: {description: "Equivalent to MultiQC's `--config` option.", category: "advanced"} clConfig: {description: "Equivalent to MultiQC's `--cl-config` option.", category: "advanced"} - finished: {description: "An array of booleans that can be used to let multiqc wait on stuff.", category: "internal_use_only"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 8aa9d7ead5471e009a04bedd6d00a6b04dc5e3cd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 19 May 2020 16:11:19 +0200 Subject: [PATCH 091/902] Add summaryFilePath parameter_meta --- hisat2.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/hisat2.wdl b/hisat2.wdl index 85bd3b35..5937f86d 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -86,6 +86,7 @@ task Hisat2 { readgroup: {description: "The readgroup id.", category: "required"} platform: {description: "The platform used for sequencing.", category: "advanced"} downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} + summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From a879f1c7be866a4124544fece45aa8163f782a2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 20 May 2020 14:58:48 +0200 Subject: [PATCH 092/902] add gffcompare output --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccd1cedc..390faf25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ version 4.0.0-develop --------------------------- + Added a log output for STAR. + Added report output to Hisat2. ++ Added output with all reports to gffcompare. + Change MultiQC inputs. It now accepts an array of reports files. It does not need access to a folder with the reports anymore. MultiQC can now be used as a normal WDL task without hacks. From 0211661c3d7dc4cc95213ed3c2a5640cfcc38ae2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 20 May 2020 15:19:24 +0200 Subject: [PATCH 093/902] add missing parameter_meta --- chunked-scatter.wdl | 3 ++- star.wdl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 1b81687a..111d8fa4 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -59,7 +59,8 @@ task ChunkedScatter { chunkSize: {description: "Equivalent to chunked-scatter's `-c` option.", category: "advanced"} overlap: {description: "Equivalent to chunked-scatter's `-o` option.", category: "advanced"} minimumBasesPerFile: {description: "Equivalent to chunked-scatter's `-m` option.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/star.wdl b/star.wdl index 4942f35e..8e6a511e 100644 --- a/star.wdl +++ b/star.wdl @@ -81,6 +81,7 @@ task GenomeGenerate { threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 63202c731561890fc23fc6a451dd69cdae879108 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 May 2020 10:35:52 +0200 Subject: [PATCH 094/902] add bcftools stats --- bcftools.wdl | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 53165c6b..8281deb7 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -55,3 +55,91 @@ task Bcf2Vcf { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } + +task Stats { + input { + File inputVcf + File? compareVcf + String? afBins + String? afTag + Boolean firstAlleleOnly = false + String? collapse + String? depth + String? exclude + File? exons + String? applyFilters + File? fastaRef + File? fastaRefIndex + String? include + Boolean splitByID = false + String? regions + File? regionsFile + Array[String] samples = [] + File? samplesFile + String? targets + File? targetsFile + String? userTsTv + Boolean verbose = false + + Int threads = 0 + Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. + String memory = "2G" # TODO: Safe estimate, refine later. + } + + command { + bcftools \ + ~{"--af-bins " + afBins} \ + ~{"--af-tag " + afTag} \ + ~{true="--1st-allele-only" false="" firstAlleleOnly} \ + ~{"--collapse " + collapse} \ + ~{"--depth " + depth} \ + ~{"--exclude " + exclude} \ + ~{"--exons " + exons} \ + ~{"--apply-filters " + applyFilters} \ + ~{"--fasta-ref " + fastaRef} \ + ~{"--include " + include} \ + ~{true="--split-by-ID" false="" splitByID} \ + ~{"--regions " + regions} \ + ~{"--regions-file " + regionsFile} \ + ~{true="--samples" false="" length(samples) > 0} ~{sep="," samples} \ + ~{"--samples-file " + samplesFile} \ + ~{"--targets " + targets} \ + ~{"--targets-file " + targetsFile} \ + ~{"--user-tstv " + userTsTv} \ + --threads ~{threads} \ + ~{true="--verbose" false="" verbose} \ + ~{inputVcf} ~{compareVcf} + } + + runtime { + cpu: threads + 1 + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + inputVcf: {description: "The vcf to be analysed.", category: "required"} + compareVcf: {description: "When inputVcf and compareVCF are given, the program generates separate stats for intersection and the complements. By default only sites are compared, samples must be given to include also sample columns.", category: "common"} + afBins: {description: "Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1).", category: "advanced"} + afTag: {description: "Allele frequency tag to use, by default estimated from AN,AC or GT.", category: "advanded"} + firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites." category: "advanced"} + collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} + depth: {description: "Depth distribution: min,max,bin size [0,500,1].", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details)." category: "advanced"} + exons: {description: "Tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed).", category: "advanced"} + applyFilters: {description: "Require at least one of the listed FILTER strings (e.g. \"PASS,.\").", category: "advanced"} + fastaRef: {description: "Faidx indexed reference sequence file to determine INDEL context.", category: "advanced"} + fastaRefIndex: {description: "Index file (.fai) for fastaRef. Must be supplied if fastaRef is supplied.", category: "advanced"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + splitByID: {description: "Collect stats for sites with ID separately (known vs novel).", category: "advanced"} + regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} + regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + samplesFile: {description: "File of samples to include.", category: "advanced"} + targets: {description: "Similar to regions but streams rather than index-jumps.", category: "advanced"} + targetsFile: {description: "Similar to regionsFile but streams rather than index-jumps.", category: "advanced"} + userTsTv: {description: ". Collect Ts/Tv stats for any tag using the given binning [0:1:100].", category: "advanced"} + threads: {description: "Number of extra decompression threads [0].", category: "advanced"} + verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} + } +} \ No newline at end of file From 12420e3882d4dd23876c181b312e775ce320a583 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 May 2020 11:30:38 +0200 Subject: [PATCH 095/902] add dockerimage --- bcftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 8281deb7..2341585d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -84,6 +84,7 @@ task Stats { Int threads = 0 Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String memory = "2G" # TODO: Safe estimate, refine later. + String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } command { @@ -115,6 +116,7 @@ task Stats { cpu: threads + 1 time_minutes: timeMinutes memory: memory + docker: dockerImage } parameter_meta { From 25bb7b38eca7f1069786cce8a00ba512b2dbe014 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 May 2020 13:42:50 +0200 Subject: [PATCH 096/902] start on variant eval --- gatk.wdl | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index edafc4d4..c1cb9480 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1551,6 +1551,78 @@ task SplitNCigarReads { } } +task VariantEval { + input { + Array[File] inputVcfs + Array[File] inputVcfsIndex + Array[File] comparisonVcfs = [] + Array[File] comparisonVcfsIndex = [] + File? referenceFasta + File? referenceFastaDict + File? referenceFastaFai + File? dbsnpVCF + File? dbsnpVCFIndex + Array[File] intervals = [] + String outputPath = "eval.table" + Boolean doNotUseAllStandardModules = false + Boolean doNotUseAllStandardStratifications = false + Array[String] evalModules = [] + Array[String] stratificationModules = [] + Array[String] samples = [] + Boolean mergeEvals = false + + String memory = "5G" + String javaXmx = "4G" + # TODO: Refine estimate. For now 4 minutes per GB of input. + Int timeMinutes = ceil(size(flatten([inputVcfs, comparisonVcfs]), "G") * 4) + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + VariantFiltration \ + -O ~{outputPath} \ + ~{true="--eval" false="" length(inputVcfs) > 0} ~{sep=" --eval " inputVcfs} \ + ~{true="--comparison" false="" length(comparisonVcfs) > 0} ~{sep=" --comparison " comparisonVcfs} \ + ~{"-R " + referenceFasta} \ + ~{"--dbsnp " + dbsnpVCF } \ + ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ + ~{true="--sample" false="" length(samples) > 0} ~{sep=' --sample ' samples} \ + ~{true="--do-not-use-all-standard-modules" false="" doNotUseAllStandardModules} \ + ~{true="--do-not-use-all-standard-stratifications" false="" doNotUseAllStandardStratifications} \ + ~{true="-EV" false="" length(evalModules) > 0} ~{sep=" -EV " evalModules} \ + ~{true="-ST" false="" length(stratificationModules) > 0} ~{sep=" -ST " stratificationModules} \ + ~{true="--merge-evals" false="" mergeEvals} + + } + + output { + File table = outputPath + } + + runtime { + cpu: 1 + docker: dockerImage + memory: memory + time_minutes: timeMinutes + } + parameter_meta { + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "common"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "common"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "common"} + dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} + dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} + outputPath: {description: "The location the output table should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} task VariantFiltration { input { File inputVcf From c58cf2183ef7d821f1454ee8093b7a60d2141b1f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 May 2020 15:17:16 +0200 Subject: [PATCH 097/902] add variant eval parameter_meta --- gatk.wdl | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index c1cb9480..8e67edc8 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1553,8 +1553,8 @@ task SplitNCigarReads { task VariantEval { input { - Array[File] inputVcfs - Array[File] inputVcfsIndex + Array[File] evalVcfs + Array[File] evalVcfsIndex Array[File] comparisonVcfs = [] Array[File] comparisonVcfsIndex = [] File? referenceFasta @@ -1574,7 +1574,7 @@ task VariantEval { String memory = "5G" String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. - Int timeMinutes = ceil(size(flatten([inputVcfs, comparisonVcfs]), "G") * 4) + Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs]), "G") * 4) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1584,7 +1584,7 @@ task VariantEval { gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ VariantFiltration \ -O ~{outputPath} \ - ~{true="--eval" false="" length(inputVcfs) > 0} ~{sep=" --eval " inputVcfs} \ + ~{true="--eval" false="" length(evalVcfs) > 0} ~{sep=" --eval " evalVcfs} \ ~{true="--comparison" false="" length(comparisonVcfs) > 0} ~{sep=" --comparison " comparisonVcfs} \ ~{"-R " + referenceFasta} \ ~{"--dbsnp " + dbsnpVCF } \ @@ -1609,12 +1609,22 @@ task VariantEval { time_minutes: timeMinutes } parameter_meta { + evalVcfs: {description: "Variant sets to evaluate." category: "required"} + evalVcfsIndex: {description: "Indexes for the variant sets.", category: "required"} + comparisonVcfs: {description: "Compare set vcfs.", category: "advanced"} + comparisonVcfsIndex: {description: "Indexes for the compare sets.", category: "advanced"} + evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true)", category: "common"} + stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true)", category: "common"} + samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"} # Advanced because this description is impossible to understand... + mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track", category: "common"} + doNotUseAllStandardModules: {description: "Do not use the standard modules by default (instead, only those that are specified with the evalModules option).", category: "common"} + doNotUseAllStandardStratifications: {description: "Do not use the standard stratification modules by default (instead, only those that are specified with the stratificationModules option).", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "common"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "common"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "common"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - outputPath: {description: "The location the output table should be written.", category: "common"} + outputPath: {description: "The location the output table should be written.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From b3a80c84a6d785aa433adf6b58cd3f694b8cd5a4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 2 Jun 2020 12:29:32 +0200 Subject: [PATCH 098/902] Take into account index size for star alignment time requirement --- CHANGELOG.md | 1 + star.wdl | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 390faf25..6fb9a3d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Take into account index size for STAR alignment time requirement. + Added a log output for STAR. + Added report output to Hisat2. + Added output with all reports to gffcompare. diff --git a/star.wdl b/star.wdl index 8e6a511e..c262dc54 100644 --- a/star.wdl +++ b/star.wdl @@ -103,7 +103,8 @@ task Star { Int runThreadN = 4 String memory = "~{5 + ceil(size(indexFiles, "G"))}G" - Int timeMinutes = 1 + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) + # 1 minute initialization + time reading in index + time aligning data. + Int timeMinutes = 1 + ceil(size(indexFiles, "G") / 2) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } From 3605b18e3b54ddccae4ffc8553af92047338983d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 4 Jun 2020 13:35:34 +0200 Subject: [PATCH 099/902] higher margin for rnaseqmetrics --- CHANGELOG.md | 1 + picard.wdl | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fb9a3d7..bde1b766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Take into account reference fasta size for Picard metrics. + Take into account index size for STAR alignment time requirement. + Added a log output for STAR. + Added report output to Hisat2. diff --git a/picard.wdl b/picard.wdl index 3103ad9b..a94abd52 100644 --- a/picard.wdl +++ b/picard.wdl @@ -87,7 +87,8 @@ task CollectMultipleMetrics { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + # Additional * 2 because picard multiple metrics reads the reference fasta twice. + Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -203,7 +204,7 @@ task CollectRnaSeqMetrics { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } From 898e02eefbd7a612dba9a2e868535d1bcc36c62a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 4 Jun 2020 13:36:41 +0200 Subject: [PATCH 100/902] Add comment --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index a94abd52..0ee5da36 100644 --- a/picard.wdl +++ b/picard.wdl @@ -204,6 +204,7 @@ task CollectRnaSeqMetrics { String memory = "9G" String javaXmx = "8G" + # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } From 3261563d3d5f873a062dc190f6e1c18846916f76 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 4 Jun 2020 13:41:26 +0200 Subject: [PATCH 101/902] Tune memory and time requirements for RNA seq --- star.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/star.wdl b/star.wdl index c262dc54..4b204eb4 100644 --- a/star.wdl +++ b/star.wdl @@ -102,9 +102,10 @@ task Star { Int? limitBAMsortRAM Int runThreadN = 4 - String memory = "~{5 + ceil(size(indexFiles, "G"))}G" - # 1 minute initialization + time reading in index + time aligning data. - Int timeMinutes = 1 + ceil(size(indexFiles, "G") / 2) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) + # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. + String memory = "~{1 + ceil(size(indexFiles, "G") * 1.3)}G" + # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. + Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } From 6d73709777868f082836de0afea1737c287f9871 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 4 Jun 2020 14:12:43 +0200 Subject: [PATCH 102/902] update star resource requirement --- star.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index 4b204eb4..b1c662f2 100644 --- a/star.wdl +++ b/star.wdl @@ -103,7 +103,8 @@ task Star { Int runThreadN = 4 # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. - String memory = "~{1 + ceil(size(indexFiles, "G") * 1.3)}G" + Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) + String memory = "~{memoryGb}G" # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" From bf6145791588577ee502749a7662f0683e67aab9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 07:56:50 +0200 Subject: [PATCH 103/902] try to fix memory --- star.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/star.wdl b/star.wdl index b1c662f2..f1549134 100644 --- a/star.wdl +++ b/star.wdl @@ -104,7 +104,9 @@ task Star { Int runThreadN = 4 # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) - String memory = "~{memoryGb}G" + # For some reason doing above calculation inside a string does not work. + # So we solve it with an optional memory string and using select_first. + String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" @@ -137,7 +139,7 @@ task Star { runtime { cpu: runThreadN - memory: memory + memory: select_first([memory, "~{memoryGb}G"]) time_minutes: timeMinutes docker: dockerImage } From a8c2b90aba14dab6c2712b69b05217fe1642605a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 08:56:28 +0200 Subject: [PATCH 104/902] Skip the Perl wrapper and talk to fastq jar directly --- fastqc.wdl | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/fastqc.wdl b/fastqc.wdl index e24b6ce4..512ca2e7 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -38,8 +38,13 @@ task Fastqc { String? dir Int threads = 1 - # Fastqc uses 250MB per thread in its wrapper. - String memory = "~{250 + 250 * threads}M" + # Set javaXmx a little high. Equal to fastqc default with 7 threads. + # This is because some fastq files need more memory. 2G per core + # is a nice cluster default, so we use all the rest of the memory for + # fastqc so we should have as little OOM crashes as possible even with + # weird edge case fastq's. + String javaXmx="1750M" + String memory = "2G" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray @@ -53,26 +58,32 @@ task Fastqc { # Just as fastqc does it. String reportDir = outdirPath + "/" + sub(name, "\.[^\.]*$", "_fastqc") - command { + # We reimplement the perl wrapper here. This is the advantage that it gives + # us more control over the amount of memory used. + command <<< set -e mkdir -p ~{outdirPath} - fastqc \ - ~{"--outdir " + outdirPath} \ - ~{true="--casava" false="" casava} \ - ~{true="--nano" false="" nano} \ - ~{true="--nofilter" false="" noFilter} \ - ~{true="--extract" false="" extract} \ - ~{true="--nogroup" false="" nogroup} \ - ~{"--min_length " + minLength } \ - ~{"--format " + format} \ - ~{"--threads " + threads} \ - ~{"--contaminants " + contaminants} \ - ~{"--adapters " + adapters} \ - ~{"--limits " + limits} \ - ~{"--kmers " + kmers} \ - ~{"--dir " + dir} \ + FASTQC_DIR="/usr/local/opt/fastqc-0.11.9" + export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" + java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \ + -Xms200M -Xmx~{javaXmx} \ + ~{"-Dfastqc.output_dir=" + outdirPath} \ + ~{true="-Dfastqc.casava=true" false="" casava} \ + ~{true="-Dfastqc.nano=true" false="" nano} \ + ~{true="-Dfastqc.nofilter=true" false="" noFilter} \ + ~{true="-Dfastqc.unzip=true" false="" extract} \ + ~{true="-Dfastqc.nogroup=true" false="" nogroup} \ + ~{"-Dfastqc.min_length=" + minLength} \ + ~{"-Dfastqc.sequence_format=" + format} \ + ~{"-Dfastqc.threads=" + threads} \ + ~{"-Dfastqc.contaminant_file=" + contaminants} \ + ~{"-Dfastqc.adapter_file=" + adapters} \ + ~{"-Dfastqc.limits_file=" + limits} \ + ~{"-Dfastqc.kmer_size=" + kmers} \ + ~{"-Djava.io.tmpdir=" + dir} \ + uk.ac.babraham.FastQC.FastQCApplication ~{seqFile} - } + >>> output { File? rawReport = if extract then reportDir + "/fastqc_data.txt" else NoneFile From 56437e740ce9132ac4b45b9409bb21fd68da8b64 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 09:28:31 +0200 Subject: [PATCH 105/902] Add missing \ --- fastqc.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fastqc.wdl b/fastqc.wdl index 512ca2e7..81af7d59 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -58,7 +58,7 @@ task Fastqc { # Just as fastqc does it. String reportDir = outdirPath + "/" + sub(name, "\.[^\.]*$", "_fastqc") - # We reimplement the perl wrapper here. This is the advantage that it gives + # We reimplement the perl wrapper here. This has the advantage that it gives # us more control over the amount of memory used. command <<< set -e @@ -81,7 +81,7 @@ task Fastqc { ~{"-Dfastqc.limits_file=" + limits} \ ~{"-Dfastqc.kmer_size=" + kmers} \ ~{"-Djava.io.tmpdir=" + dir} \ - uk.ac.babraham.FastQC.FastQCApplication + uk.ac.babraham.FastQC.FastQCApplication \ ~{seqFile} >>> From 1db21ed6d827ace5889f37bd12d9e3fa4bea869f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 09:31:01 +0200 Subject: [PATCH 106/902] Add fastqc change to changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bde1b766..c38544b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ The FastQC task now talks to the Java directly instead of using the included + Perl wrapper for FastQC. This has the advantage that memory and threads can + be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) + was set, as OOM errors occurred frequently on some fastqs. + Take into account reference fasta size for Picard metrics. + Take into account index size for STAR alignment time requirement. + Added a log output for STAR. From 6e4334c2f933d70e9acf2310f0d30f4799302e64 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 12:01:34 +0200 Subject: [PATCH 107/902] fix typo --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 8e67edc8..88904be5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1609,7 +1609,7 @@ task VariantEval { time_minutes: timeMinutes } parameter_meta { - evalVcfs: {description: "Variant sets to evaluate." category: "required"} + evalVcfs: {description: "Variant sets to evaluate.", category: "required"} evalVcfsIndex: {description: "Indexes for the variant sets.", category: "required"} comparisonVcfs: {description: "Compare set vcfs.", category: "advanced"} comparisonVcfsIndex: {description: "Indexes for the compare sets.", category: "advanced"} From 3eccb5b72d896fedff0974579b3eb1b6ab291035 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 13:38:16 +0200 Subject: [PATCH 108/902] fix typo and use newer version of GATK --- gatk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 88904be5..c00af93a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1575,15 +1575,15 @@ task VariantEval { String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs]), "G") * 4) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.7.0--py38_0" } command { set -e mkdir -p "$(dirname ~{outputPath})" gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ - VariantFiltration \ - -O ~{outputPath} \ + VariantEval \ + --output ~{outputPath} \ ~{true="--eval" false="" length(evalVcfs) > 0} ~{sep=" --eval " evalVcfs} \ ~{true="--comparison" false="" length(comparisonVcfs) > 0} ~{sep=" --comparison " comparisonVcfs} \ ~{"-R " + referenceFasta} \ From 2eca5b51d53103f496abe4738540eb116634b44a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 16:15:03 +0200 Subject: [PATCH 109/902] fix stats task --- bcftools.wdl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 2341585d..017ba21d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -59,7 +59,10 @@ task Bcf2Vcf { task Stats { input { File inputVcf + File inputVcfIndex File? compareVcf + File? compareVcfIndex + String outputPath = basename(inputVcf) + ".stats" String? afBins String? afTag Boolean firstAlleleOnly = false @@ -88,7 +91,9 @@ task Stats { } command { - bcftools \ + set -e + mkdir -p $(dirname ~{outputPath}) + bcftools stats \ ~{"--af-bins " + afBins} \ ~{"--af-tag " + afTag} \ ~{true="--1st-allele-only" false="" firstAlleleOnly} \ @@ -109,7 +114,11 @@ task Stats { ~{"--user-tstv " + userTsTv} \ --threads ~{threads} \ ~{true="--verbose" false="" verbose} \ - ~{inputVcf} ~{compareVcf} + ~{inputVcf} ~{compareVcf} > ~{outputPath} + } + + output { + File stats = outputPath } runtime { @@ -124,10 +133,10 @@ task Stats { compareVcf: {description: "When inputVcf and compareVCF are given, the program generates separate stats for intersection and the complements. By default only sites are compared, samples must be given to include also sample columns.", category: "common"} afBins: {description: "Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1).", category: "advanced"} afTag: {description: "Allele frequency tag to use, by default estimated from AN,AC or GT.", category: "advanded"} - firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites." category: "advanced"} + firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} depth: {description: "Depth distribution: min,max,bin size [0,500,1].", category: "advanced"} - exclude: {description: "Exclude sites for which the expression is true (see man page for details)." category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} exons: {description: "Tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed).", category: "advanced"} applyFilters: {description: "Require at least one of the listed FILTER strings (e.g. \"PASS,.\").", category: "advanced"} fastaRef: {description: "Faidx indexed reference sequence file to determine INDEL context.", category: "advanced"} From 00e1180c565bf4e6da9aec1e9e62514d4be8dff6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 16:27:54 +0200 Subject: [PATCH 110/902] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 390faf25..13beb086 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Added bcftools stats task. ++ Added GATK VariantEval task. + Added a log output for STAR. + Added report output to Hisat2. + Added output with all reports to gffcompare. From f2c1d5ba6110225d76a1edb101eaec503410c2e0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 8 Jun 2020 10:14:01 +0200 Subject: [PATCH 111/902] complete parameter_meta for bcf tools --- bcftools.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 017ba21d..bd79c2c6 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -129,8 +129,10 @@ task Stats { } parameter_meta { - inputVcf: {description: "The vcf to be analysed.", category: "required"} + inputVcf: {description: "The VCF to be analysed.", category: "required"} + inputVcfIndex: {description: "The index for the input VCF.", category: "required"} compareVcf: {description: "When inputVcf and compareVCF are given, the program generates separate stats for intersection and the complements. By default only sites are compared, samples must be given to include also sample columns.", category: "common"} + compareVcfIndex: {description: "Index for the compareVcf.", category: "common"} afBins: {description: "Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1).", category: "advanced"} afTag: {description: "Allele frequency tag to use, by default estimated from AN,AC or GT.", category: "advanded"} firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} @@ -152,5 +154,9 @@ task Stats { userTsTv: {description: ". Collect Ts/Tv stats for any tag using the given binning [0:1:100].", category: "advanced"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} } } \ No newline at end of file From a967bf6bf912292c3e93afc7be8fa685bb97d96d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 8 Jun 2020 10:15:33 +0200 Subject: [PATCH 112/902] Remove empty line --- gatk.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index c00af93a..09de0488 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1595,7 +1595,6 @@ task VariantEval { ~{true="-EV" false="" length(evalModules) > 0} ~{sep=" -EV " evalModules} \ ~{true="-ST" false="" length(stratificationModules) > 0} ~{sep=" -ST " stratificationModules} \ ~{true="--merge-evals" false="" mergeEvals} - } output { From 4f31b5fb1b6af42941545ee54b862ff74e6d7373 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 9 Jun 2020 15:40:40 +0200 Subject: [PATCH 113/902] allow for very large scattersizes --- biopet/biopet.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index cc8e1bc6..89319409 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -214,6 +214,7 @@ task ScatterRegions { input { File referenceFasta File referenceFastaDict + Int scatterSizeMillions = 1000 Int? scatterSize File? regions Boolean notSplitContigs = false @@ -230,6 +231,7 @@ task ScatterRegions { # linking. This path must be in the containers filesystem, otherwise the # linking does not work. String outputDirPath = "scatters" + String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" command <<< set -e -o pipefail @@ -237,7 +239,7 @@ task ScatterRegions { biopet-scatterregions -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -R ~{referenceFasta} \ -o ~{outputDirPath} \ - ~{"-s " + scatterSize} \ + ~{"-s " + finalSize} \ ~{"-L " + regions} \ ~{"--bamFile " + bamFile} \ ~{true="--notSplitContigs" false="" notSplitContigs} @@ -271,7 +273,8 @@ task ScatterRegions { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - scatterSize: {description: "Equivalent to biopet scatterregions' `-s` option.", category: "common"} + scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} + scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} regions: {description: "The regions to be scattered.", category: "advanced"} notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", category: "advanced"} From d15fedbf79cb0950552fa14d56d94ef537477e34 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 10 Jun 2020 09:31:19 +0200 Subject: [PATCH 114/902] Fix spelling error in parameter_meta of common.wdl --- common.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.wdl b/common.wdl index f8325523..e96cc1c8 100644 --- a/common.wdl +++ b/common.wdl @@ -243,7 +243,7 @@ task YamlToJson { parameter_meta { yaml: {description: "The YAML file to convert.", category: "required"} outputJson: {description: "The location the output JSON file should be written to.", category: "advanced"} - memory: {description: "The maximum aount of memroy the job will need.", category: "advanced"} + memory: {description: "The maximum amount of memory the job will need.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8aa272366f8a575fdca2dd9bd93c92db3257ebed Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 10 Jun 2020 15:45:32 +0200 Subject: [PATCH 115/902] Fix error in star --- star.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/star.wdl b/star.wdl index f1549134..7812e3df 100644 --- a/star.wdl +++ b/star.wdl @@ -35,7 +35,7 @@ task GenomeGenerate { command { set -e - mkdir -p "$(dirname ~{genomeDir})" + mkdir -p ~{genomeDir} STAR \ --runMode genomeGenerate \ --runThreadN ~{threads} \ @@ -50,7 +50,7 @@ task GenomeGenerate { File chrNameLength = "~{genomeDir}/chrNameLength.txt" File chrName = "~{genomeDir}/chrName.txt" File chrStart = "~{genomeDir}/chrStart.txt" - File genome = "~{genomeDir}/genome.txt" + File genome = "~{genomeDir}/Genome" File genomeParameters = "~{genomeDir}/genomeParameters.txt" File sa = "~{genomeDir}/SA" File saIndex = "~{genomeDir}/SAindex" From e323a23dc13433ce4bb8bd71d94f840e0627babb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 11 Jun 2020 16:35:59 +0200 Subject: [PATCH 116/902] Update minimumIdentity to correct type and default. --- CHANGELOG.md | 2 ++ talon.wdl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13beb086..29351743 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ TALON: Update `minimumIdentity` to correct type (float, was integer) & set + new default according to developers (0.8, was 0). + Added bcftools stats task. + Added GATK VariantEval task. + Added a log output for STAR. diff --git a/talon.wdl b/talon.wdl index b2ae3a62..98e0c13a 100644 --- a/talon.wdl +++ b/talon.wdl @@ -380,7 +380,7 @@ task Talon { File databaseFile String genomeBuild Float minimumCoverage = 0.9 - Int minimumIdentity = 0 + Float minimumIdentity = 0.8 String outputPrefix Int cores = 4 From 9faafc1d81422e0a0a452921ab262241bcb607f4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 12 Jun 2020 10:04:56 +0200 Subject: [PATCH 117/902] Trigger travis test. --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 29351743..2d4267c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- -+ TALON: Update `minimumIdentity` to correct type (float, was integer) & set - new default according to developers (0.8, was 0). ++ TALON: Update `minimumIdentity` to correct type (float, was integer) + & set new default according to developers (0.8, was 0). + Added bcftools stats task. + Added GATK VariantEval task. + Added a log output for STAR. From 92c8725fc597743e7aa4b65ddec79f3e0bd872b3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 12 Jun 2020 10:38:14 +0200 Subject: [PATCH 118/902] Add new STAR options. --- CHANGELOG.md | 2 ++ star.wdl | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d4267c1..61f47609 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ STAR: Add options regarding alignment score and read length for tweaking when + processing rRNA depleted samples. + TALON: Update `minimumIdentity` to correct type (float, was integer) & set new default according to developers (0.8, was 0). + Added bcftools stats task. diff --git a/star.wdl b/star.wdl index 8e6a511e..6d80e9d3 100644 --- a/star.wdl +++ b/star.wdl @@ -95,6 +95,10 @@ task Star { String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" String readFilesCommand = "zcat" + Int outFilterScoreMin = 0 + Float outFilterScoreMinOverLread = 0.66 + Int outFilterMatchNmin = 0 + Float outFilterMatchNminOverLread = 0.66 String? outStd String? twopassMode = "Basic" Array[String]? outSAMattrRGline @@ -119,6 +123,10 @@ task Star { --genomeDir ~{sub(indexFiles[0], basename(indexFiles[0]), "")} \ --outSAMtype ~{outSAMtype} \ --readFilesCommand ~{readFilesCommand} \ + --outFilterScoreMin ~{outFilterScoreMin} \ + --outFilterScoreMinOverLread ~{outFilterScoreMinOverLread} \ + --outFilterMatchNmin ~{outFilterMatchNmin} \ + --outFilterMatchNminOverLread ~{outFilterMatchNminOverLread} \ ~{"--outSAMunmapped " + outSAMunmapped} \ ~{"--runThreadN " + runThreadN} \ ~{"--outStd " + outStd} \ @@ -146,6 +154,10 @@ task Star { outFileNamePrefix: {description: "The prefix for the output files. May include directories.", category: "required"} outSAMtype: {description: "The type of alignment file to be produced. Currently only `BAM SortedByCoordinate` is supported.", category: "advanced"} readFilesCommand: {description: "Equivalent to star's `--readFilesCommand` option.", category: "advanced"} + outFilterScoreMin: {description: "Equivalent to star's `--outFilterScoreMin` option.", category: "advanced"} + outFilterScoreMinOverLread: {description: "Equivalent to star's `--outFilterScoreMinOverLread` option.", category: "advanced"} + outFilterMatchNmin: {description: "Equivalent to star's `--outFilterMatchNmin` option.", category: "advanced"} + outFilterMatchNminOverLread: {description: "Equivalent to star's `--outFilterMatchNminOverLread` option.", category: "advanced"} outStd: {description: "Equivalent to star's `--outStd` option.", category: "advanced"} twopassMode: {description: "Equivalent to star's `--twopassMode` option.", category: "advanced"} outSAMattrRGline: {description: "The readgroup lines for the fastq pairs given (in the same order as the fastq files).", category: "common"} @@ -154,8 +166,7 @@ task Star { runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 1d784bfd5891e47dfa4821c6b4a7542463b10c2b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 12 Jun 2020 13:03:06 +0200 Subject: [PATCH 119/902] Update CHANGELOG. --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61f47609..77189f3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- -+ STAR: Add options regarding alignment score and read length for tweaking when - processing rRNA depleted samples. ++ STAR: Add options regarding alignment score (regarding read length as well) + for tweaking when processing rRNA depleted samples. + TALON: Update `minimumIdentity` to correct type (float, was integer) & set new default according to developers (0.8, was 0). + Added bcftools stats task. From c8d874812f45e10eded3f48979d69f6d81d7b90f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 15 Jun 2020 10:49:45 +0200 Subject: [PATCH 120/902] Change new options to optional. --- star.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/star.wdl b/star.wdl index 6d80e9d3..da90516d 100644 --- a/star.wdl +++ b/star.wdl @@ -95,10 +95,10 @@ task Star { String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" String readFilesCommand = "zcat" - Int outFilterScoreMin = 0 - Float outFilterScoreMinOverLread = 0.66 - Int outFilterMatchNmin = 0 - Float outFilterMatchNminOverLread = 0.66 + Int? outFilterScoreMin + Float? outFilterScoreMinOverLread + Int? outFilterMatchNmin + Float? outFilterMatchNminOverLread String? outStd String? twopassMode = "Basic" Array[String]? outSAMattrRGline From 0ad2ddbe042bf9a3520892b8980278244ccdc3db Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 15 Jun 2020 10:58:18 +0200 Subject: [PATCH 121/902] Fix command section now that new inputs are optional. --- star.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/star.wdl b/star.wdl index da90516d..6f95a637 100644 --- a/star.wdl +++ b/star.wdl @@ -123,10 +123,10 @@ task Star { --genomeDir ~{sub(indexFiles[0], basename(indexFiles[0]), "")} \ --outSAMtype ~{outSAMtype} \ --readFilesCommand ~{readFilesCommand} \ - --outFilterScoreMin ~{outFilterScoreMin} \ - --outFilterScoreMinOverLread ~{outFilterScoreMinOverLread} \ - --outFilterMatchNmin ~{outFilterMatchNmin} \ - --outFilterMatchNminOverLread ~{outFilterMatchNminOverLread} \ + ~{"--outFilterScoreMin " + outFilterScoreMin} \ + ~{"--outFilterScoreMinOverLread " + outFilterScoreMinOverLread} \ + ~{"--outFilterMatchNmin " + outFilterMatchNmin} \ + ~{"--outFilterMatchNminOverLread " + outFilterMatchNminOverLread} \ ~{"--outSAMunmapped " + outSAMunmapped} \ ~{"--runThreadN " + runThreadN} \ ~{"--outStd " + outStd} \ From 2cb77bebc7bafb7316450bf77a5d4805ae952d60 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 15 Jun 2020 13:17:59 +0200 Subject: [PATCH 122/902] Add scatter-regions task --- chunked-scatter.wdl | 49 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 111d8fa4..93a603fe 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -64,3 +64,52 @@ task ChunkedScatter { category: "advanced"} } } + + +task ScatterRegions { + input { + File inputFile + String? prefix + Boolean splitContigs = false + Int scatterSizeMillions = 1000 + Int? scatterSize + Int timeMinutes = 2 + String memory = "256M" + String dockerImage = "biowdl/chunked-scatter:latest" + } + + String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" + + command { + scatter-regions \ + --print-paths \ + --scatter-size ~{finalSize} \ + ~{true="--split-contigs" false="" splitContigs} \ + ~{"--prefix " + prefix} \ + ~{inputFile} + } + + output { + Array[File] scatters = read_lines(stdout()) + } + + runtime { + cpu: 1 + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + inputFile: {description: "The input file, either a bed file or a sequence dict. Which format is used is detected by the extension: '.bed', '.fai' or '.dict'." category: "required"} + prefix: {description: "The prefix of the ouput files. Output will be named like: .bed, in which N is an incrementing number. Default 'scatter-'.", category: "advanced"} + splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} + scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} + scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} + + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 5f13c04f7fd3c91eba022fac9dc4ad0a05d4081d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 15 Jun 2020 13:26:00 +0200 Subject: [PATCH 123/902] typo --- chunked-scatter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 93a603fe..f01cfc82 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -101,7 +101,7 @@ task ScatterRegions { } parameter_meta { - inputFile: {description: "The input file, either a bed file or a sequence dict. Which format is used is detected by the extension: '.bed', '.fai' or '.dict'." category: "required"} + inputFile: {description: "The input file, either a bed file or a sequence dict. Which format is used is detected by the extension: '.bed', '.fai' or '.dict'.", category: "required"} prefix: {description: "The prefix of the ouput files. Output will be named like: .bed, in which N is an incrementing number. Default 'scatter-'.", category: "advanced"} splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} From 5068af102abbd5d8726f63b4017ed6948ffc3031 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 15 Jun 2020 13:39:16 +0200 Subject: [PATCH 124/902] use the same prefix as the biopet-scatterregions tool --- chunked-scatter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index f01cfc82..96dbf1eb 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -69,7 +69,7 @@ task ChunkedScatter { task ScatterRegions { input { File inputFile - String? prefix + String prefix = "scatters/scatter-" Boolean splitContigs = false Int scatterSizeMillions = 1000 Int? scatterSize From a5cdc7059188174270b76b023d8a2c1c40710bef Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 08:01:57 +0200 Subject: [PATCH 125/902] 300 minutes per G of input --- star.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index 7812e3df..bc47bc02 100644 --- a/star.wdl +++ b/star.wdl @@ -108,7 +108,7 @@ task Star { # So we solve it with an optional memory string and using select_first. String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. - Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) + Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 300 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } From edb371e333e05764080c48937d81bd1c7f8921ba Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 08:07:55 +0200 Subject: [PATCH 126/902] Also read reference and dbsnpvcf as part of time estimate --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 09de0488..700a245c 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1574,7 +1574,7 @@ task VariantEval { String memory = "5G" String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. - Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs]), "G") * 4) + Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 4) String dockerImage = "quay.io/biocontainers/gatk4:4.1.7.0--py38_0" } From 7db296e70f162c68ce3c8b05c86618c749e7f9a5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 08:58:37 +0200 Subject: [PATCH 127/902] resource requirements for multiqc --- multiqc.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index 6a967b3f..7dcf333e 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -51,11 +51,11 @@ task MultiQC { Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload. File? config # A directory String? clConfig - - String memory = "4G" - Int timeMinutes = 120 + String? memory + Int timeMinutes = 2 + ceil(size(reports, "G") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } + Int memoryGb = 2 + ceil(size(reports, "G")) # This is where the reports end up. It does not need to be changed by the # user. It is full of symbolic links, so it is not of any use to the user @@ -132,7 +132,7 @@ task MultiQC { } runtime { - memory: memory + memory: select_first([memory, "~{memoryGb}G"]) time_minutes: timeMinutes docker: dockerImage } From 5c1625e38ab490e805f8e5a7efc158a1638fad50 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:27:18 +0200 Subject: [PATCH 128/902] Add scatter-regions to changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e33b51d..cb7b635e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) + that replaces biopet-scatterregions. + The FastQC task now talks to the Java directly instead of using the included Perl wrapper for FastQC. This has the advantage that memory and threads can be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) From f3006d5aea706e2f85b6edd2c9e7bf16b83e090a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:27:28 +0200 Subject: [PATCH 129/902] update gatk varianteval times --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 700a245c..2089eabb 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1574,7 +1574,7 @@ task VariantEval { String memory = "5G" String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. - Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 4) + Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) String dockerImage = "quay.io/biocontainers/gatk4:4.1.7.0--py38_0" } From 88a1d405ada9a859c14bf77ee4c46e92a6c384f0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:29:03 +0200 Subject: [PATCH 130/902] Update changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb7b635e..2b8de510 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,14 +11,14 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and + STAR. + Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) that replaces biopet-scatterregions. + The FastQC task now talks to the Java directly instead of using the included Perl wrapper for FastQC. This has the advantage that memory and threads can be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) was set, as OOM errors occurred frequently on some fastqs. -+ Take into account reference fasta size for Picard metrics. -+ Take into account index size for STAR alignment time requirement. + Added bcftools stats task. + Added GATK VariantEval task. + Added a log output for STAR. From e45b413fffdc73ed83cc21b6c38be2314d8744bc Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:51:50 +0200 Subject: [PATCH 131/902] add javaxmx parameter_meta --- fastqc.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fastqc.wdl b/fastqc.wdl index 81af7d59..04b6813f 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -116,6 +116,8 @@ task Fastqc { kmers: {description: "Equivalent to fastqc's --kmers option.", category: "advanced"} dir: {description: "Equivalent to fastqc's --dir option.", category: "advanced"} threads: {description: "The number of cores to use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 5c9b7c8322932d193b1d158bc681c7d5a86f8750 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:55:50 +0200 Subject: [PATCH 132/902] Remove memoryGb from the input section --- star.wdl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/star.wdl b/star.wdl index 4c134171..4da67f72 100644 --- a/star.wdl +++ b/star.wdl @@ -106,16 +106,18 @@ task Star { Int? limitBAMsortRAM Int runThreadN = 4 - # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. - Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) - # For some reason doing above calculation inside a string does not work. - # So we solve it with an optional memory string and using select_first. String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 300 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } + # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. + Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) + # For some reason doing above calculation inside a string does not work. + # So we solve it with an optional memory string and using select_first + # in the runtime section. + #TODO Could be extended for all possible output extensions Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} From 4402a95dc00cce75fffc55bb8bfec634425b42d8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 17 Jun 2020 08:59:17 +0200 Subject: [PATCH 133/902] Update image and tasks for chunk scatter --- chunked-scatter.wdl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 96dbf1eb..6f2b465d 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -28,27 +28,28 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile + String memory = "256M" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/chunked-scatter:0.1.0--py_0" + String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" } command { - set -e - mkdir -p ~{prefix} chunked-scatter \ + --print-paths \ -p ~{prefix} \ - -i ~{inputFile} \ ~{"-c " + chunkSize} \ ~{"-o " + overlap} \ - ~{"-m " + minimumBasesPerFile} + ~{"-m " + minimumBasesPerFile} \ + ~{inputFile} } output { - Array[File] scatters = glob(prefix + "*.bed") + Array[File] scatters = read_lines(stdout()) } runtime { - memory: "4G" + cpu: 1 + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -75,7 +76,7 @@ task ScatterRegions { Int? scatterSize Int timeMinutes = 2 String memory = "256M" - String dockerImage = "biowdl/chunked-scatter:latest" + String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" } String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" From aea930c687263740f1edf326de7b2c2ba21219f3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 17 Jun 2020 09:00:20 +0200 Subject: [PATCH 134/902] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78891941..7813c209 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Update the image for chunked-scatter and make use of new features from 0.2.0. + Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and STAR. + Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) From a0fccc3cf59afd835836d433132a46f100eb3ce6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 17 Jun 2020 09:00:51 +0200 Subject: [PATCH 135/902] Update parameter_meta --- chunked-scatter.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 6f2b465d..3ef0c747 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -61,6 +61,7 @@ task ChunkedScatter { overlap: {description: "Equivalent to chunked-scatter's `-o` option.", category: "advanced"} minimumBasesPerFile: {description: "Equivalent to chunked-scatter's `-m` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 185ead0f78f7450328d473ffbce157117907de0d Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 18 Jun 2020 10:43:20 +0200 Subject: [PATCH 136/902] Add pbmm2, the PacBio wrapper for minimap2 See https://github.com/PacificBiosciences/pbmm2 for details. --- pbmm2.wdl | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 pbmm2.wdl diff --git a/pbmm2.wdl b/pbmm2.wdl new file mode 100644 index 00000000..18d3cb9a --- /dev/null +++ b/pbmm2.wdl @@ -0,0 +1,74 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Mapping { + input { + String presetOption + Boolean sort=true + String sample + File referenceMMI + File queryFile + + Int cores = 4 + String memory = "30G" + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) + String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" + } + + command { + set -e + pbmm2 align \ + --preset ~{presetOption} \ + ~{true="--sort" false="" sort} \ + -j ~{cores} \ + ~{referenceMMI} \ + ~{queryFile} \ + ~{sample}.align.bam + + } + + output { + File outputAlignmentFile = sample + ".align.bam" + } + + runtime { + cpu: cores + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + presetOption: {description: "This option applies multiple options at the same time.", category: "required"} + sort: {description: "Sort the output bam file.", category: "advanced"} + sample: {description: "Name of the sample"} + referenceMMI: {description: "MMI file for the reference.", category: "required"} + queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + outputAlignmentFile: {description: "Mapped bam files."} + } +} From 59366d5ebdea9e781dfea4113f83871aaf30244d Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 18 Jun 2020 10:45:17 +0200 Subject: [PATCH 137/902] Update changelog for pbmm2 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7813c209..d1e84d9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Add tasks for pbmm2, the PacBio wrapper for minimap2. + Update the image for chunked-scatter and make use of new features from 0.2.0. + Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and STAR. From 84f1235c44abf723c9f15f5e471891fbed98b5b9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 19 Jun 2020 13:17:39 +0200 Subject: [PATCH 138/902] Do not use unnecessary threads --- bwa.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 3dd7883b..2cf637d1 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -132,7 +132,9 @@ task Kit { } runtime { - cpu: threads + 1 # One thread for bwa-postalt + samtools. + # One extra thread for bwa-postalt + samtools is not needed. + # These only use 5-10% of compute power and not always simultaneously. + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage From b7d9dad9ad9f804fc41ab8bda3e3961b0441fabc Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Jun 2020 11:48:00 +0200 Subject: [PATCH 139/902] Don't sort --- bwa.wdl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 2cf637d1..c5980b9b 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -117,13 +117,7 @@ task Kit { k8 /opt/conda/bin/bwa-postalt.js \ -p ~{outputPrefix}.hla \ ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ - samtools sort \ - ~{"-@ " + sortThreads} \ - -m ~{sortMemoryPerThread} \ - -l ~{compressionLevel} \ - - \ - -o ~{outputPrefix}.aln.bam - samtools index ~{outputPrefix}.aln.bam ~{outputPrefix}.aln.bai + samtools view -b -1 - > ~{outputPrefix}.aln.bam } output { From 0c9ba9ae63938bf9c96a29a95c76bba5b0e64ad1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Jun 2020 12:22:45 +0200 Subject: [PATCH 140/902] Update samtools sort to also index --- samtools.wdl | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 5648eb1c..6454bd3a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -366,16 +366,19 @@ task Merge { task Sort { input { File inputBam - String outputPath + String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - - String memory = "2G" + Int threads = 0 + Int memoryPerThread = 4 + Int memoryGb = 1 + (threads + 1) * memoryPerThread String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - Int? threads } + # Select first needed as outputPath is optional input. (bug in cromwell) + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -385,15 +388,17 @@ task Sort { ~{"--threads " + threads} \ -o ~{outputPath} \ ~{inputBam} + samtools index ~{outputPath} ~{bamIndexPath} } output { - File outputSortedBam = outputPath + File outputBam = outputPath + File outputBamIndex = bamIndexPath } runtime { - cpu: 1 + select_first([threads, 0]) - memory: memory + cpu: 1 + threads + memory: "~{memoryGb}G" docker: dockerImage time_minutes: timeMinutes } @@ -404,12 +409,12 @@ task Sort { outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} # outputs - outputSortedBam: {description: "Sorted BAM file."} + outputBam: {description: "Sorted BAM file."} } } From feae6e792468ec23f6936dc97b349be270dbb6a9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Jun 2020 13:07:55 +0200 Subject: [PATCH 141/902] remove other sorting bits --- bwa.wdl | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index c5980b9b..14b9a005 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -92,14 +92,7 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - # Samtools uses *additional* threads. So by default this option should - # not be used. - Int? sortThreads - # Compression uses zlib. Higher than level 2 causes enormous slowdowns. - # GATK/Picard default is level 2. - String sortMemoryPerThread = "4G" - Int compressionLevel = 1 - String memory = "20G" + String memory = 1 + ceil(size(bwaIndex.indexFiles, "G")) Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" } @@ -122,7 +115,6 @@ task Kit { output { File outputBam = outputPrefix + ".aln.bam" - File outputBamIndex = outputPrefix + ".aln.bai" } runtime { @@ -143,9 +135,7 @@ task Kit { readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} - sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} - sortMemoryPerThread: {description: "The amount of memory for each sorting thread.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -153,7 +143,6 @@ task Kit { # outputs outputBam: "The produced BAM file." - outputBamIndex: "The index of the produced BAM file." } } From 424178d62f31a773700f5e7fb46476fd043ab51e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Jun 2020 13:25:55 +0200 Subject: [PATCH 142/902] use correct memory estimate --- bwa.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 14b9a005..3f1276d1 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -92,7 +92,7 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - String memory = 1 + ceil(size(bwaIndex.indexFiles, "G")) + String memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G")) Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" } @@ -121,7 +121,7 @@ task Kit { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: memory + memory: "~{memoryGb}G" time_minutes: timeMinutes docker: dockerImage } @@ -136,7 +136,7 @@ task Kit { sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 10c023ab71a6dc3fc529d439a59e5031c9682ea7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 23 Jun 2020 13:50:21 +0200 Subject: [PATCH 143/902] use newer more experimental image --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 3f1276d1..375d8d0b 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -94,7 +94,7 @@ task Kit { Int threads = 4 String memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G")) Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" + String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" } command { From 0cb76481426e96f75b81a0e1fb516a43e8bffba5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 10:38:48 +0200 Subject: [PATCH 144/902] Revert "Don't sort" This reverts commit b7d9dad9ad9f804fc41ab8bda3e3961b0441fabc. --- bwa.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 375d8d0b..3e11eb2f 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -110,7 +110,13 @@ task Kit { k8 /opt/conda/bin/bwa-postalt.js \ -p ~{outputPrefix}.hla \ ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ - samtools view -b -1 - > ~{outputPrefix}.aln.bam + samtools sort \ + ~{"-@ " + sortThreads} \ + -m ~{sortMemoryPerThread} \ + -l ~{compressionLevel} \ + - \ + -o ~{outputPrefix}.aln.bam + samtools index ~{outputPrefix}.aln.bam ~{outputPrefix}.aln.bai } output { From 5b096cee1b35d8ff404567571ce429f3a46ec7c4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 10:43:27 +0200 Subject: [PATCH 145/902] Put sorting back into bwakit task --- bwa.wdl | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 3e11eb2f..0095f48c 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -92,7 +92,16 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - String memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G")) + + # Samtools uses *additional* threads. So by default this option should + # not be used. + Int sortThreads = 0 + # Compression uses zlib. Higher than level 2 causes enormous slowdowns. + # GATK/Picard default is level 2. + Int sortMemoryPerThreadGb = 4 + Int compressionLevel = 1 + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" } @@ -112,7 +121,7 @@ task Kit { ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ ~{"-@ " + sortThreads} \ - -m ~{sortMemoryPerThread} \ + -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ -o ~{outputPrefix}.aln.bam @@ -121,6 +130,7 @@ task Kit { output { File outputBam = outputPrefix + ".aln.bam" + File outputBamIndex = outputPrefix + ".aln.bai" } runtime { @@ -141,14 +151,18 @@ task Kit { readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputBam: "The produced BAM file." + outputBamIndex: "The index of the produced BAM file." } } From 99647123ad96a38aee08ee5a85dd03b7d764a095 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 10:49:13 +0200 Subject: [PATCH 146/902] Make sure samtools uses memory --- samtools.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 6454bd3a..825a0531 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -370,8 +370,8 @@ task Sort { Boolean sortByName = false Int compressionLevel = 1 Int threads = 0 - Int memoryPerThread = 4 - Int memoryGb = 1 + (threads + 1) * memoryPerThread + Int memoryPerThreadGb = 4 + Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) } @@ -386,6 +386,7 @@ task Sort { -l ~{compressionLevel} \ ~{true="-n" false="" sortByName} \ ~{"--threads " + threads} \ + -m ~{memoryPerThreadGb}G \ -o ~{outputPath} \ ~{inputBam} samtools index ~{outputPath} ~{bamIndexPath} @@ -410,6 +411,7 @@ task Sort { sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 4f6bd9ad6211697fbc60a5e926717aa6d0d31398 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 10:50:40 +0200 Subject: [PATCH 147/902] update parameter_meta --- bwa.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 0095f48c..12f2ad54 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -155,7 +155,6 @@ task Kit { sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 0665c22fa2d64813c022e3e9dc731dac2a5fa63c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 13:25:52 +0200 Subject: [PATCH 148/902] Add sortSam task --- picard.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/picard.wdl b/picard.wdl index 0ee5da36..ef648aea 100644 --- a/picard.wdl +++ b/picard.wdl @@ -650,6 +650,52 @@ task ScatterIntervalList { } } +task SortSam { + input { + File inputBam + String outputPath + + Int XmxGb = 4 + Int memoryGb = 1 + XmxGb + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + # A mulled container is needed to have both picard and bwa in one container. + # This container contains: picard (2.18.7), bwa (0.7.17-r1188) + String dockerImage = "quay.io/biocontainers/picard:2.23.1--h37ae868_0" + } + + command { + mkdir -p "$(dirname ~{outputPath})" + picard -Xmx~{XmxGb}G -XX:ParallelGCThreads=1 SortSam \ + INPUT=/dev/stdin \ + OUTPUT=~{outputPath} \ + SORT_ORDER=coordinate \ + CREATE_INDEX=true + } + + output { + File outputBam = outputPath + File outputBamIndex = sub(outputPath, "\.bam$", ".bai") + } + + runtime { + cpu: 1 + memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputBam: {description: "The unsorted input BAM file", category: "required"} + outputPath: {description: "The location the output BAM file should be written to.", category: "required"} + memoryGb: {description: "The amount of memory this job will use.", category: "advanced"} + XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task SortVcf { input { Array[File]+ vcfFiles From f4a8fac6b1f3717b770c6e4bf001af70bae27373 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 14:25:31 +0200 Subject: [PATCH 149/902] Fix sambamba index command in sort --- sambamba.wdl | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 sambamba.wdl diff --git a/sambamba.wdl b/sambamba.wdl new file mode 100644 index 00000000..942a8ead --- /dev/null +++ b/sambamba.wdl @@ -0,0 +1,80 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sort { + input { + File inputBam + String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" + Boolean sortByName = false + Int compressionLevel = 1 + Int threads = 1 + Int memoryPerThreadGb = 4 + Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + } + + # Select first needed as outputPath is optional input. (bug in cromwell) + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + sambamba sort \ + -l ~{compressionLevel} \ + ~{true="-n" false="" sortByName} \ + ~{"--nthreads " + threads} \ + -m ~{memoryPerThreadGb}G \ + -o ~{outputPath} \ + ~{inputBam} + sambamba index \ + ~{"--nthreads " + threads} \ + ~{outputPath} ~{bamIndexPath} + } + + output { + File outputBam = outputPath + File outputBamIndex = bamIndexPath + } + + runtime { + cpu: threads + memory: "~{memoryGb}G" + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + # outputs + outputBam: {description: "Sorted BAM file."} + } +} \ No newline at end of file From 45ffa55397fed9d8385a3df28abaf65d8783b690 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 14:26:54 +0200 Subject: [PATCH 150/902] add threads on index as well --- samtools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 825a0531..fbc491eb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -389,7 +389,9 @@ task Sort { -m ~{memoryPerThreadGb}G \ -o ~{outputPath} \ ~{inputBam} - samtools index ~{outputPath} ~{bamIndexPath} + samtools index \ + -@ ~{threads} \ + ~{outputPath} ~{bamIndexPath} } output { From c6eb077f633198a14832ef76497e71a191daaf30 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 15:42:44 +0200 Subject: [PATCH 151/902] correct memory calculation --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 12f2ad54..8d2cb75c 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -101,7 +101,7 @@ task Kit { Int sortMemoryPerThreadGb = 4 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads + Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * (sortThreads + 1) Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" } From fac254be0e5ef5ab470a1328c182d18c1355cf48 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 15:48:49 +0200 Subject: [PATCH 152/902] use correct memory for sambama --- sambamba.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sambamba.wdl b/sambamba.wdl index 942a8ead..440b8f03 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -28,7 +28,7 @@ task Sort { Int compressionLevel = 1 Int threads = 1 Int memoryPerThreadGb = 4 - Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb + Int memoryGb = 1 + threads * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) } From 950207105c42cba70095c1b192d8975be32e021f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 08:06:47 +0200 Subject: [PATCH 153/902] Actually sort the inputBam, not stdout --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index ef648aea..5ce0c885 100644 --- a/picard.wdl +++ b/picard.wdl @@ -666,7 +666,7 @@ task SortSam { command { mkdir -p "$(dirname ~{outputPath})" picard -Xmx~{XmxGb}G -XX:ParallelGCThreads=1 SortSam \ - INPUT=/dev/stdin \ + INPUT=~{inputBam} \ OUTPUT=~{outputPath} \ SORT_ORDER=coordinate \ CREATE_INDEX=true From 7b7eb3da6af77b88228bd5707a12204990809553 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 08:45:21 +0200 Subject: [PATCH 154/902] Update sortsam task with extra variables --- picard.wdl | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/picard.wdl b/picard.wdl index 5ce0c885..85c07dba 100644 --- a/picard.wdl +++ b/picard.wdl @@ -654,9 +654,15 @@ task SortSam { input { File inputBam String outputPath - - Int XmxGb = 4 - Int memoryGb = 1 + XmxGb + Boolean sortByName = false + Boolean createIndex = true + Boolean createMd5File = false + Int maxRecordsInRam = 500000 + Int compressionLevel = 1 + + # Default ram of 4 GB. Using 125001.0 to prevent an answer of + # 4.000000001 which gets rounded to 5. + Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) # A mulled container is needed to have both picard and bwa in one container. # This container contains: picard (2.18.7), bwa (0.7.17-r1188) @@ -664,12 +670,17 @@ task SortSam { } command { + set -e mkdir -p "$(dirname ~{outputPath})" picard -Xmx~{XmxGb}G -XX:ParallelGCThreads=1 SortSam \ INPUT=~{inputBam} \ OUTPUT=~{outputPath} \ - SORT_ORDER=coordinate \ - CREATE_INDEX=true + MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \ + SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ + CREATE_INDEX=~{true="true" false="false" createIndex} \ + COMPRESSION_LEVEL=~{compressionLevel} \ + CREATE_MD5_FILE=~{true="true" false="false" createMd5File} + } output { @@ -679,7 +690,7 @@ task SortSam { runtime { cpu: 1 - memory: "~{memoryGb}G" + memory: "~{1 + XmxGb}G" time_minutes: timeMinutes docker: dockerImage } @@ -687,7 +698,6 @@ task SortSam { parameter_meta { inputBam: {description: "The unsorted input BAM file", category: "required"} outputPath: {description: "The location the output BAM file should be written to.", category: "required"} - memoryGb: {description: "The amount of memory this job will use.", category: "advanced"} XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 160ed6f2525aba0e673022beda11e24272ae4d31 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 08:49:17 +0200 Subject: [PATCH 155/902] update picard --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 85c07dba..10287583 100644 --- a/picard.wdl +++ b/picard.wdl @@ -662,6 +662,7 @@ task SortSam { # Default ram of 4 GB. Using 125001.0 to prevent an answer of # 4.000000001 which gets rounded to 5. + # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) # A mulled container is needed to have both picard and bwa in one container. From a7f50e409d90316fcac56c3ebd8ec5b705f39cb3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 09:44:48 +0200 Subject: [PATCH 156/902] Picard markduplicates has a default compression of 1 --- picard.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 10287583..21ff5ea1 100644 --- a/picard.wdl +++ b/picard.wdl @@ -462,6 +462,8 @@ task MarkDuplicates { Array[File] inputBamIndexes String outputBamPath String metricsPath + Int compressionLevel = 1 + Boolean createMd5File = false String memory = "9G" String javaXmx = "8G" @@ -488,13 +490,14 @@ task MarkDuplicates { INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ METRICS_FILE=~{metricsPath} \ + COMPRESSION_LEVEL=~{compressionLevel} \ VALIDATION_STRINGENCY=SILENT \ ~{"READ_NAME_REGEX=" + read_name_regex} \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ CLEAR_DT="false" \ CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false \ - CREATE_MD5_FILE=true + CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } output { From 363e90e5d7ddf5a69715b3e95cbbb2a91d9248ed Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 09:59:38 +0200 Subject: [PATCH 157/902] Do not create md5 file by default anymore --- picard.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index 21ff5ea1..f0f687f0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -367,6 +367,8 @@ task GatherBamFiles { String memory = "4G" String javaXmx = "3G" + Int compressionLevel = 1 + Boolean createMd5File = false Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -378,8 +380,9 @@ task GatherBamFiles { GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ + COMPRESSION_LEVEL=~{compressionLevel} \ CREATE_INDEX=true \ - CREATE_MD5_FILE=true + CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } output { @@ -658,7 +661,6 @@ task SortSam { File inputBam String outputPath Boolean sortByName = false - Boolean createIndex = true Boolean createMd5File = false Int maxRecordsInRam = 500000 Int compressionLevel = 1 @@ -681,7 +683,7 @@ task SortSam { OUTPUT=~{outputPath} \ MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \ SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ - CREATE_INDEX=~{true="true" false="false" createIndex} \ + CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} From e15bcc6015bacc6e3b6c68257537faef28d0d1e8 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 25 Jun 2020 15:33:07 +0200 Subject: [PATCH 158/902] Add new tasks to talon. --- CHANGELOG.md | 3 ++ scripts | 2 +- talon.wdl | 82 +++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7813c209..1ac182c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ TALON: Update `FilterTalonTranscripts` to new version. ++ TALON: Add `GetSpliceJunctions` & `LabelReads` tasks. ++ TALON: Update to version 5.0. + Update the image for chunked-scatter and make use of new features from 0.2.0. + Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and STAR. diff --git a/scripts b/scripts index b83da72b..325a129c 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit b83da72b9b43b956a3062b78fb08044eb9fae464 +Subproject commit 325a129c14de56b2055ee0e9e0da7dc74df5fec4 diff --git a/talon.wdl b/talon.wdl index 98e0c13a..2e944382 100644 --- a/talon.wdl +++ b/talon.wdl @@ -32,7 +32,7 @@ task CreateAbundanceFileFromDatabase { String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -88,7 +88,7 @@ task CreateGtfFromDatabase { String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -137,12 +137,16 @@ task FilterTalonTranscripts { File databaseFile String annotationVersion String outputPrefix + Float maxFracA = 0.5 + Int minCount = 5 + Boolean allowGenomic = false - File? pairingsFile + File? datasetsFile + Int? minDatasets String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -152,7 +156,11 @@ task FilterTalonTranscripts { --db=~{databaseFile} \ -a ~{annotationVersion} \ ~{"--o=" + outputPrefix + "_whitelist.csv"} \ - ~{"-p " + pairingsFile} + --maxFracA=~{maxFracA} \ + --minCount=~{minCount} \ + ~{true="--allowGenomic" false="" allowGenomic} \ + --datasets=~{datasetsFile} \ + --minDatasets=~{minDatasets} } output { @@ -170,7 +178,11 @@ task FilterTalonTranscripts { databaseFile: {description: "TALON database.", category: "required"} annotationVersion: {description: "Which annotation version to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - pairingsFile: {description: "A file indicating which datasets should be considered together.", category: "advanced"} + maxFracA: {description: "Maximum fraction of As to allow in the window located immediately after any read assigned to a novel transcript.", category: "advanced"} + minCount: {description: "Number of minimum occurrences required for a novel transcript PER dataset.", category: "advanced"} + allowGenomic: {description: "If this option is set, transcripts from the Genomic novelty category will be permitted in the output.", category: "advanced"} + datasetsFile: {description: "Datasets to include.", category: "advanced"} + minDatasets: {description: "Minimum number of datasets novel transcripts must be found in.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -190,7 +202,7 @@ task GetReadAnnotations { String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -228,6 +240,54 @@ task GetReadAnnotations { } } +task GetSpliceJunctions { + input { + File GTFfile + File databaseFile + File referenceGTF + String runMode = "intron" + String outputPrefix + + String memory = "4G" + Int timeMinutes = 30 + String dockerImage = "biocontainers/talon:v5.0_cv1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + talon_get_sjs \ + --gtf ~{GTFfile} \ + --db ~{databaseFile} \ + --ref ~{referenceGTF} \ + --mode ~{runMode} \ + --outprefix ~{outputPrefix} + } + + output { + + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + GTFfile: {description: "TALON GTF file from which to extract exons/introns.", category: "required"} + databaseFile: { description: "TALON database.", category: "required"} + referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} + runMode: {description: "Determines whether to include introns or exons in the output.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs +} + task InitializeTalonDatabase { input { File GTFfile @@ -241,7 +301,7 @@ task InitializeTalonDatabase { String memory = "10G" Int timeMinutes = 60 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -293,7 +353,7 @@ task ReformatGtf { String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -334,7 +394,7 @@ task SummarizeDatasets { String memory = "4G" Int timeMinutes = 50 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -386,7 +446,7 @@ task Talon { Int cores = 4 String memory = "25G" Int timeMinutes = 2880 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command <<< From ddae57644147098dda2d867029cb0af627e145fe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 09:15:13 +0200 Subject: [PATCH 159/902] Increase time estimates for sort tasks --- picard.wdl | 2 +- sambamba.wdl | 2 +- samtools.wdl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index f0f687f0..f03535c0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -669,7 +669,7 @@ task SortSam { # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) # A mulled container is needed to have both picard and bwa in one container. # This container contains: picard (2.18.7), bwa (0.7.17-r1188) String dockerImage = "quay.io/biocontainers/picard:2.23.1--h37ae868_0" diff --git a/sambamba.wdl b/sambamba.wdl index 440b8f03..cccfddd3 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -30,7 +30,7 @@ task Sort { Int memoryPerThreadGb = 4 Int memoryGb = 1 + threads * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } # Select first needed as outputPath is optional input. (bug in cromwell) diff --git a/samtools.wdl b/samtools.wdl index fbc491eb..edcea9be 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -373,7 +373,7 @@ task Sort { Int memoryPerThreadGb = 4 Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } # Select first needed as outputPath is optional input. (bug in cromwell) From 5db2a1f9097132d11d3654ebe45e88364eeef517 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 10:42:12 +0200 Subject: [PATCH 160/902] add markdup task --- sambamba.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/sambamba.wdl b/sambamba.wdl index cccfddd3..b4114297 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -20,6 +20,49 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. + +task Markdup { + input { + Array[File] inputBams + String outputPath + Int threads = 1 + Int compressionLevel = 1 + Int? hashTableSize + Int? overFlowListSize + Int? sortBufferSize + Int? ioBufferSize + Boolean removeDuplicates = false + + # According to the manual sambamba markdup uses about 2G per 100 million reads. + Int memoryGb = 1 + ceil(size(inputBams, 'G') / 8) + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) + } + String bamIndexPath = sub(outputPath, "\.bam$", ".bai") + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + sambamba markdup \ + --nthreads ~{threads} \ + -l ~{compressionLevel} \ + ~{true="-r" false="" removeDuplicates} \ + ~{"--hash-table-size " + hashTableSize} \ + ~{"--overflow-list-size " + overFlowListSize} \ + ~{"--sort-buffer-size " + sortBufferSize} \ + ~{"--io-buffer-size " + ioBufferSize} \ + ~{sep=' ' inputBams} ~{outputPath} + sambamba index ~{outputPath} ~{bamIndexPath} + } + + runtime { + memory: "~{memoryGb}G" + cpu: threads + time_minutes: timeMinutes + docker: dockerImage + } +} + task Sort { input { File inputBam From caaba5971f6295d13ea33f728744fdb5f692d007 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 10:50:10 +0200 Subject: [PATCH 161/902] add outputs --- sambamba.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sambamba.wdl b/sambamba.wdl index b4114297..b402f580 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -55,6 +55,11 @@ task Markdup { sambamba index ~{outputPath} ~{bamIndexPath} } + output { + File outputBam = outputPath + File outputBamIndex = bamIndexPath + } + runtime { memory: "~{memoryGb}G" cpu: threads From 2c4dd54c7dd91809558d2f81a139a94c2399bb39 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 11:05:08 +0200 Subject: [PATCH 162/902] fix picard optional md5 --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index f03535c0..39575392 100644 --- a/picard.wdl +++ b/picard.wdl @@ -506,7 +506,7 @@ task MarkDuplicates { output { File outputBam = outputBamPath File outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") - File outputBamMd5 = outputBamPath + ".md5" + File? outputBamMd5 = outputBamPath + ".md5" File metricsFile = metricsPath } From 8df42e20f13986734efb6fa085088bf85da8117d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 11:07:26 +0200 Subject: [PATCH 163/902] do not require indexes on the markdup task --- picard.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 39575392..69a14538 100644 --- a/picard.wdl +++ b/picard.wdl @@ -462,7 +462,6 @@ task GatherVcfs { task MarkDuplicates { input { Array[File]+ inputBams - Array[File] inputBamIndexes String outputBamPath String metricsPath Int compressionLevel = 1 @@ -519,7 +518,6 @@ task MarkDuplicates { parameter_meta { # inputs inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} - inputBamIndexes: {description: "Th eindexes for the input BAM files.", category: "required"} outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} From 7252022d6ab47c58962ae330ffea273056f3560e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 14:21:07 +0200 Subject: [PATCH 164/902] sambamba creates index automaticall --- sambamba.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index b402f580..4de4dfdc 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -94,9 +94,8 @@ task Sort { -m ~{memoryPerThreadGb}G \ -o ~{outputPath} \ ~{inputBam} - sambamba index \ - ~{"--nthreads " + threads} \ - ~{outputPath} ~{bamIndexPath} + # sambamba creates an index for us + mv ~{outputPath}.bai ~{bamIndexPath} } output { From fbc7b95956624b01e0e60574e18f6849cc149976 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 14:31:02 +0200 Subject: [PATCH 165/902] proper threads for sambamba --- sambamba.wdl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index 4de4dfdc..1b10b37a 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -25,7 +25,8 @@ task Markdup { input { Array[File] inputBams String outputPath - Int threads = 1 + # Sambamba additional threads like samtools + Int threads = 0 Int compressionLevel = 1 Int? hashTableSize Int? overFlowListSize @@ -52,7 +53,8 @@ task Markdup { ~{"--sort-buffer-size " + sortBufferSize} \ ~{"--io-buffer-size " + ioBufferSize} \ ~{sep=' ' inputBams} ~{outputPath} - sambamba index ~{outputPath} ~{bamIndexPath} + # sambamba creates an index for us + mv ~{outputPath}.bai ~{bamIndexPath} } output { @@ -62,7 +64,7 @@ task Markdup { runtime { memory: "~{memoryGb}G" - cpu: threads + cpu: threads + 1 time_minutes: timeMinutes docker: dockerImage } @@ -74,9 +76,10 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 1 + # Sambamba additional threads like samtools + Int threads = 0 Int memoryPerThreadGb = 4 - Int memoryGb = 1 + threads * memoryPerThreadGb + Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } @@ -104,7 +107,7 @@ task Sort { } runtime { - cpu: threads + cpu: threads + 1 memory: "~{memoryGb}G" docker: dockerImage time_minutes: timeMinutes From 3cbd0cb87d04085d214573a3316f760dad6f08cc Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 29 Jun 2020 10:24:13 +0200 Subject: [PATCH 166/902] Add bam index to output --- pbmm2.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 18d3cb9a..84fbd2d0 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -35,7 +35,6 @@ task Mapping { } command { - set -e pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ @@ -43,11 +42,11 @@ task Mapping { ~{referenceMMI} \ ~{queryFile} \ ~{sample}.align.bam - } output { File outputAlignmentFile = sample + ".align.bam" + File outputIndexFile = sample + ".align.bam.bai" } runtime { @@ -69,6 +68,7 @@ task Mapping { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - outputAlignmentFile: {description: "Mapped bam files."} + outputAlignmentFile: {description: "Mapped bam file."} + outputIndexFile: {description: "Bam index file."} } } From 958aa3555a2e745e72b729c7433d2f24d2e6423b Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 29 Jun 2020 10:34:52 +0200 Subject: [PATCH 167/902] Update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index b83da72b..325a129c 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit b83da72b9b43b956a3062b78fb08044eb9fae464 +Subproject commit 325a129c14de56b2055ee0e9e0da7dc74df5fec4 From ab72ec3fd7e5ab78f083bda44b538248980c4358 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 29 Jun 2020 13:59:24 +0200 Subject: [PATCH 168/902] use validation stringency silent for sort sam --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 69a14538..ac7f944a 100644 --- a/picard.wdl +++ b/picard.wdl @@ -683,6 +683,7 @@ task SortSam { SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ + VALIDATION_STRINGENCY=SILENT \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } From e3a3add789a4a42c5acd40c38e2b654162893c32 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 29 Jun 2020 15:01:20 +0200 Subject: [PATCH 169/902] fix sambamba threads --- sambamba.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index 1b10b37a..33377736 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -26,7 +26,7 @@ task Markdup { Array[File] inputBams String outputPath # Sambamba additional threads like samtools - Int threads = 0 + Int threads = 1 Int compressionLevel = 1 Int? hashTableSize Int? overFlowListSize @@ -64,7 +64,7 @@ task Markdup { runtime { memory: "~{memoryGb}G" - cpu: threads + 1 + cpu: threads time_minutes: timeMinutes docker: dockerImage } @@ -77,9 +77,9 @@ task Sort { Boolean sortByName = false Int compressionLevel = 1 # Sambamba additional threads like samtools - Int threads = 0 + Int threads = 1 Int memoryPerThreadGb = 4 - Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb + Int memoryGb = 1 + threads * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } @@ -107,7 +107,7 @@ task Sort { } runtime { - cpu: threads + 1 + cpu: threads memory: "~{memoryGb}G" docker: dockerImage time_minutes: timeMinutes From b81efca481f40522b9ab404cb63e834e60f122a7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 29 Jun 2020 15:13:03 +0200 Subject: [PATCH 170/902] correct additional threads --- samtools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index edcea9be..ddc77c79 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -369,9 +369,9 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 0 + Int threads = 1 Int memoryPerThreadGb = 4 - Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb + Int memoryGb = 1 + threads * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } @@ -400,7 +400,7 @@ task Sort { } runtime { - cpu: 1 + threads + cpu: 1 memory: "~{memoryGb}G" docker: dockerImage time_minutes: timeMinutes From 82e9c135d4d5dcf7c86c8108a3a4c6d230cd3691 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 29 Jun 2020 15:15:25 +0200 Subject: [PATCH 171/902] Remove additional thread nonsense --- bwa.wdl | 9 +++------ sambamba.wdl | 4 +--- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 8d2cb75c..3b092e8c 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -92,16 +92,13 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - - # Samtools uses *additional* threads. So by default this option should - # not be used. - Int sortThreads = 0 + Int sortThreads = 1 # Compression uses zlib. Higher than level 2 causes enormous slowdowns. # GATK/Picard default is level 2. Int sortMemoryPerThreadGb = 4 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * (sortThreads + 1) + Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" } @@ -152,7 +149,7 @@ task Kit { sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/sambamba.wdl b/sambamba.wdl index 33377736..4ef62ddc 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -25,7 +25,6 @@ task Markdup { input { Array[File] inputBams String outputPath - # Sambamba additional threads like samtools Int threads = 1 Int compressionLevel = 1 Int? hashTableSize @@ -76,7 +75,6 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - # Sambamba additional threads like samtools Int threads = 1 Int memoryPerThreadGb = 4 Int memoryGb = 1 + threads * memoryPerThreadGb @@ -122,7 +120,7 @@ task Sort { memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} # outputs outputBam: {description: "Sorted BAM file."} From 2aa28e29bc867f90a8b6463d2d95a67301364f6d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 08:14:37 +0200 Subject: [PATCH 172/902] Switch sorting to samtools. Do not index as it is not required for marking duplicates --- bwa.wdl | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 3b092e8c..4cfd6fbe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,12 +29,14 @@ task Mem { String? readgroup Int threads = 4 - String memory = "~{5 + ceil(size(bwaIndex.indexFiles, "G"))}G" - String picardXmx = "4G" + Int sortThreads = 1 + Int sortMemoryPerThreadGb = 4 + Int compressionLevel = 1 + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) - # A mulled container is needed to have both picard and bwa in one container. - # This container contains: picard (2.18.7), bwa (0.7.17-r1188) - String dockerImage = "quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:43ec6124f9f4f875515f9548733b8b4e5fed9aa6-0" + # This container contains: samtools (1.10), bwa (0.7.17-r1188) + String dockerImage = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" } command { @@ -46,21 +48,21 @@ task Mem { ~{bwaIndex.fastaFile} \ ~{read1} \ ~{read2} \ - | picard -Xmx~{picardXmx} -XX:ParallelGCThreads=1 SortSam \ - INPUT=/dev/stdin \ - OUTPUT=~{outputPath} \ - SORT_ORDER=coordinate \ - CREATE_INDEX=true + | samtools sort \ + ~{"-@ " + sortThreads} \ + -m ~{sortMemoryPerThreadGb}G \ + -l ~{compressionLevel} \ + - \ + -o ~{outputPath} } output { File outputBam = outputPath - File outputBamIndex = sub(outputPath, "\.bam$", ".bai") } runtime { cpu: threads - memory: memory + memory: "~{memoryGb}G" time_minutes: timeMinutes docker: dockerImage } @@ -73,9 +75,9 @@ task Mem { readgroup: {description: "The readgroup to be assigned to the reads. See BWA mem's `-R` option.", category: "common"} threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - picardXmx: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", - category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -93,14 +95,13 @@ task Kit { Int threads = 4 Int sortThreads = 1 - # Compression uses zlib. Higher than level 2 causes enormous slowdowns. - # GATK/Picard default is level 2. Int sortMemoryPerThreadGb = 4 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" + # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools + String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } command { @@ -122,12 +123,10 @@ task Kit { -l ~{compressionLevel} \ - \ -o ~{outputPrefix}.aln.bam - samtools index ~{outputPrefix}.aln.bam ~{outputPrefix}.aln.bai } output { File outputBam = outputPrefix + ".aln.bam" - File outputBamIndex = outputPrefix + ".aln.bai" } runtime { @@ -158,7 +157,6 @@ task Kit { # outputs outputBam: "The produced BAM file." - outputBamIndex: "The index of the produced BAM file." } } From d604025c5d1c603a8a29ce66587f648ebbc4d2d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 08:24:24 +0200 Subject: [PATCH 173/902] Fix bwakit command --- bwa.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 4cfd6fbe..0c35bf3a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -100,7 +100,7 @@ task Kit { # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools + # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -114,7 +114,7 @@ task Kit { ~{read1} \ ~{read2} \ 2> ~{outputPrefix}.log.bwamem | \ - k8 /opt/conda/bin/bwa-postalt.js \ + bwa-postalt.js \ -p ~{outputPrefix}.hla \ ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ From 187e1277c00f4ce25b496fc8a9f0d986e9870512 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 08:47:34 +0200 Subject: [PATCH 174/902] update sambamba memory requirements and parameter_meta --- sambamba.wdl | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index 4ef62ddc..bf58dbc8 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -25,18 +25,23 @@ task Markdup { input { Array[File] inputBams String outputPath - Int threads = 1 + # Sambamba scales like this: 1 thread is fully utilized (1). 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7. + # 2 threads reduces wall clock time by more than 40%. + Int threads = 2 Int compressionLevel = 1 Int? hashTableSize Int? overFlowListSize - Int? sortBufferSize - Int? ioBufferSize + # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1 + Int sortBufferSize = 2048 + Int ioBufferSize = 128 Boolean removeDuplicates = false - # According to the manual sambamba markdup uses about 2G per 100 million reads. - Int memoryGb = 1 + ceil(size(inputBams, 'G') / 8) + # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. + # Added 1024 mb as a margin of safety + Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) + # Time minute calculation does not work well for higher number of threads. + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads } String bamIndexPath = sub(outputPath, "\.bam$", ".bai") @@ -62,11 +67,29 @@ task Markdup { } runtime { - memory: "~{memoryGb}G" + memory: "~{memoryMb}M" cpu: threads time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + inputBams: {description: "The input BAM files.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"} + removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"} + hashTableSize: {description: "Sets sambamba's hash table size", category: "advanced"} + overFlowListSize: {description: "Sets sambamba's overflow list size", category: "advanced"} + sortBufferSize: {description: "The amount of mb allocated to the sort buffer", category: "advanced"} + ioBufferSize: {description: "The amount of mb allocated to each IO buffer. Sambamba uses two IO buffers.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + # outputs + outputBam: {description: "Sorted BAM file."} + } } task Sort { From f71d42fda049459d85b36ae2c871f62b01ca6481 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 11:22:26 +0200 Subject: [PATCH 175/902] Use memory estimates from WGS sample --- gatk.wdl | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 2089eabb..64297c8f 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -93,8 +93,8 @@ task ApplyBQSR { File referenceFastaDict File referenceFastaFai - String memory = "5G" - String javaXmx = "4G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 2048 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -102,7 +102,7 @@ task ApplyBQSR { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \ ApplyBQSR \ --create-output-bam-md5 \ --add-output-sam-program-record \ @@ -126,7 +126,7 @@ task ApplyBQSR { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -141,8 +141,8 @@ task ApplyBQSR { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -165,8 +165,8 @@ task BaseRecalibrator { File referenceFastaDict File referenceFastaFai - String memory = "5G" - String javaXmx = "4G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 1024 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -174,7 +174,7 @@ task BaseRecalibrator { command { set -e mkdir -p "$(dirname ~{recalibrationReportPath})" - gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \ BaseRecalibrator \ -R ~{referenceFasta} \ -I ~{inputBam} \ @@ -192,7 +192,7 @@ task BaseRecalibrator { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -210,8 +210,8 @@ task BaseRecalibrator { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -940,7 +940,7 @@ task GetPileupSummaries { } } -# Call variants on a single sample with HaplotypeCaller to produce a GVCF + task HaplotypeCaller { input { Array[File]+ inputBams @@ -962,8 +962,8 @@ task HaplotypeCaller { Boolean dontUseSoftClippedBases = false Float? standardMinConfidenceThresholdForCalling - String memory = "5G" - String javaXmx = "4G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 3072 Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -971,7 +971,7 @@ task HaplotypeCaller { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \ HaplotypeCaller \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -996,7 +996,7 @@ task HaplotypeCaller { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -1022,8 +1022,8 @@ task HaplotypeCaller { dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From ef447f286fb9b8f5663dc88a35b16ee26b264e97 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 11:50:35 +0200 Subject: [PATCH 176/902] Use memory requirements based on WGS --- picard.wdl | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/picard.wdl b/picard.wdl index ac7f944a..d778e172 100644 --- a/picard.wdl +++ b/picard.wdl @@ -85,8 +85,8 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - String memory = "9G" - String javaXmx = "8G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 3072 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -96,7 +96,7 @@ task CollectMultipleMetrics { command { set -e mkdir -p "$(dirname ~{basename})" - picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \ CollectMultipleMetrics \ I=~{inputBam} \ R=~{referenceFasta} \ @@ -158,7 +158,7 @@ task CollectMultipleMetrics { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -184,9 +184,8 @@ task CollectMultipleMetrics { category: "advanced"} collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", category: "advanced"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -365,8 +364,8 @@ task GatherBamFiles { Array[File]+ inputBamsIndex String outputBamPath - String memory = "4G" - String javaXmx = "3G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 1024 Int compressionLevel = 1 Boolean createMd5File = false Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) @@ -376,7 +375,7 @@ task GatherBamFiles { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \ GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ @@ -394,7 +393,7 @@ task GatherBamFiles { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -403,8 +402,8 @@ task GatherBamFiles { inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From e3ba9aa0760d936ce801df955fbea9fa619613b1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 13:50:47 +0200 Subject: [PATCH 177/902] Add threads to samtools --- samtools.wdl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index ddc77c79..0b7ade7c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -329,6 +329,7 @@ task Merge { Array[File]+ bamFiles String outputBamPath = "merged.bam" Boolean force = true + Int threads = 1 Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" @@ -338,7 +339,10 @@ task Merge { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} + samtools merge \ + --threads ~{threads} \ + ~{true="-f" false="" force} \ + ~{outputBamPath} ~{sep=' ' bamFiles} samtools index ~{outputBamPath} ~{indexPath} } @@ -348,6 +352,7 @@ task Merge { } runtime { + cpu: threads docker: dockerImage time_minutes: timeMinutes } From 2ec5b8e3a4d2aca5d6c5c74c1a439a01ad9997e2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 15:34:01 +0200 Subject: [PATCH 178/902] fix samtools merge --- samtools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 0b7ade7c..49495693 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -336,11 +336,12 @@ task Merge { } String indexPath = sub(outputBamPath, "\.bam$",".bai") + # Samtools uses additional threads for merge. command { set -e mkdir -p "$(dirname ~{outputBamPath})" samtools merge \ - --threads ~{threads} \ + --threads ~{threads - 1} \ ~{true="-f" false="" force} \ ~{outputBamPath} ~{sep=' ' bamFiles} samtools index ~{outputBamPath} ~{indexPath} From 822e68d987b80781e14685283b8081b7a2e49d37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 15:35:15 +0200 Subject: [PATCH 179/902] increase time for gatherbam --- picard.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index d778e172..8962c4b6 100644 --- a/picard.wdl +++ b/picard.wdl @@ -368,7 +368,8 @@ task GatherBamFiles { Int javaXmxMb = 1024 Int compressionLevel = 1 Boolean createMd5File = false - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) + # One minute per input gigabyte. + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } From bbeef08f838dacf283cee14a5b1494e46f8fbbe8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 15:39:32 +0200 Subject: [PATCH 180/902] make compression level optional for gather bam files --- picard.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 8962c4b6..1d8376ca 100644 --- a/picard.wdl +++ b/picard.wdl @@ -366,7 +366,7 @@ task GatherBamFiles { Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 1024 - Int compressionLevel = 1 + Int? compressionLevel Boolean createMd5File = false # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) @@ -380,7 +380,7 @@ task GatherBamFiles { GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ - COMPRESSION_LEVEL=~{compressionLevel} \ + ~{"COMPRESSION_LEVEL=" + compressionLevel} \ CREATE_INDEX=true \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } From d29b7e221d6d0804e59007ce7b6260a1ae5f4159 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 1 Jul 2020 09:34:19 +0200 Subject: [PATCH 181/902] make md5 file optional --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 1d8376ca..0e877f23 100644 --- a/picard.wdl +++ b/picard.wdl @@ -388,7 +388,7 @@ task GatherBamFiles { output { File outputBam = outputBamPath File outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") - File outputBamMd5 = outputBamPath + ".md5" + File? outputBamMd5 = outputBamPath + ".md5" } runtime { From 71541cbda170782b87c9cad094b403dff1fe9e1f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 2 Jul 2020 11:00:57 +0200 Subject: [PATCH 182/902] Update tasks. --- talon.wdl | 84 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 11 deletions(-) diff --git a/talon.wdl b/talon.wdl index 2e944382..b71d0a89 100644 --- a/talon.wdl +++ b/talon.wdl @@ -242,8 +242,8 @@ task GetReadAnnotations { task GetSpliceJunctions { input { - File GTFfile - File databaseFile + File SJinformationFile + String inputFileType = "db" File referenceGTF String runMode = "intron" String outputPrefix @@ -253,19 +253,20 @@ task GetSpliceJunctions { String dockerImage = "biocontainers/talon:v5.0_cv1" } + Map[String, String] SJfileType = {"db": "--db", "gtf": "--gtf"} + command { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_get_sjs \ - --gtf ~{GTFfile} \ - --db ~{databaseFile} \ + ~{SJfileType[inputFileType] + SJinformationFile} \ --ref ~{referenceGTF} \ --mode ~{runMode} \ --outprefix ~{outputPrefix} } output { - + File outputSJfile = outputPrefix + "_" + runMode + "s.tsv" } runtime { @@ -276,8 +277,8 @@ task GetSpliceJunctions { parameter_meta { # inputs - GTFfile: {description: "TALON GTF file from which to extract exons/introns.", category: "required"} - databaseFile: { description: "TALON database.", category: "required"} + SJinformationFile: {description: "TALON GTF file or database from which to extract exons/introns.", category: "required"} + inputFileType: {description: "The file type of SJinformationFile.", category: "required"} referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} runMode: {description: "Determines whether to include introns or exons in the output.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -286,6 +287,8 @@ task GetSpliceJunctions { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs + outputSJfile: {description: "File containing locations, novelty and transcript assignments of exons/introns."} + } } task InitializeTalonDatabase { @@ -347,6 +350,65 @@ task InitializeTalonDatabase { } } +task LabelReads { + input { + File SAMfile + File referenceGenome + Int fracaRangeSize = 20 + String tmpDir = "./tmp_label_reads" + Boolean deleteTmp = true + String outputPrefix + + Int threads = 2 + String memory = "4G" + Int timeMinutes = 2880 + String dockerImage = "biocontainers/talon:v5.0_cv1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + talon_label_reads \ + --f=~{SAMfile} \ + --g=~{referenceGenome} \ + --t=~{threads} \ + --ar=~{fracaRangeSize} \ + --tmpDir=~{tmpDir} \ + ~{true="--deleteTmp" false="" deleteTmp} \ + --o=~{outputPrefix} + } + + output { + File outputLabeledSAM = outputPrefix + "_labeled.sam" + File outputReadLabels = outputPrefix + "_read_labels.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + SAMfile: {description: "SAM file of transcripts.", category: "required"} + referenceGenome: {description: "Reference genome fasta file.", category: "required"} + fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"} + tmpDir: {description: "Path to directory for tmp files.", category: "advanced"} + deleteTmp: {description: "If set, tmp dir will be removed.", category: "advanced"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputLabeledSAM: {description: "SAM file with labeled transcripts."} + outputReadLabels: {description: "Tabular file with fraction description per read."} + } +} + task ReformatGtf { input { File GTFfile @@ -443,7 +505,7 @@ task Talon { Float minimumIdentity = 0.8 String outputPrefix - Int cores = 4 + Int threads = 4 String memory = "25G" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" @@ -465,7 +527,7 @@ task Talon { ~{"--f " + outputPrefix + "/talonConfigFile.csv"} \ --db ~{databaseFile} \ --build ~{genomeBuild} \ - --threads ~{cores} \ + --threads ~{threads} \ --cov ~{minimumCoverage} \ --identity ~{minimumIdentity} \ ~{"--o " + outputPrefix + "/run"} @@ -479,7 +541,7 @@ task Talon { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -495,7 +557,7 @@ task Talon { minimumCoverage: {description: "Minimum alignment coverage in order to use a SAM entry.", category: "common"} minimumIdentity: {description: "Minimum alignment identity in order to use a SAM entry.", category: "common" } outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From e921e42fb39f384c4f5dc590b70925b9d1c02a14 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 2 Jul 2020 11:03:54 +0200 Subject: [PATCH 183/902] Update threads and memories. --- talon.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/talon.wdl b/talon.wdl index b71d0a89..c70850dc 100644 --- a/talon.wdl +++ b/talon.wdl @@ -359,8 +359,8 @@ task LabelReads { Boolean deleteTmp = true String outputPrefix - Int threads = 2 - String memory = "4G" + Int threads = 4 + String memory = "25G" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } From cd18ce6a03823e8b09229bddf0f1dcd9ac7a1fdb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 3 Jul 2020 10:26:38 +0200 Subject: [PATCH 184/902] Address comments. --- CHANGELOG.md | 3 ++- talon.wdl | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69a57b1f..9a546718 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- -+ TALON: Update `FilterTalonTranscripts` to new version. ++ TALON: Update `FilterTalonTranscripts` to new version, which removes the + pairingsFile and replaces this with datasetsFile. + TALON: Add `GetSpliceJunctions` & `LabelReads` tasks. + TALON: Update to version 5.0. + Add tasks for pbmm2, the PacBio wrapper for minimap2. diff --git a/talon.wdl b/talon.wdl index c70850dc..87fc407d 100644 --- a/talon.wdl +++ b/talon.wdl @@ -280,7 +280,7 @@ task GetSpliceJunctions { SJinformationFile: {description: "TALON GTF file or database from which to extract exons/introns.", category: "required"} inputFileType: {description: "The file type of SJinformationFile.", category: "required"} referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} - runMode: {description: "Determines whether to include introns or exons in the output.", category: "required"} + runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 68eeeb5381649f80c6de77f8c839f02ba5eb684a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 3 Jul 2020 10:30:01 +0200 Subject: [PATCH 185/902] Address last comment. --- talon.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/talon.wdl b/talon.wdl index 87fc407d..a469ddba 100644 --- a/talon.wdl +++ b/talon.wdl @@ -278,7 +278,7 @@ task GetSpliceJunctions { parameter_meta { # inputs SJinformationFile: {description: "TALON GTF file or database from which to extract exons/introns.", category: "required"} - inputFileType: {description: "The file type of SJinformationFile.", category: "required"} + inputFileType: {description: "The file type of SJinformationFile.", category: "common"} referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} From 82bd73cec2e608dbda328f98dc95561d9ec8ba96 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:19:22 +0200 Subject: [PATCH 186/902] update haplotypecaller memory --- gatk.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 64297c8f..f79312fc 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -963,7 +963,8 @@ task HaplotypeCaller { Float? standardMinConfidenceThresholdForCalling Int memoryMb = javaXmxMb + 512 - Int javaXmxMb = 3072 + # Memory increases with time used. 4G should cover most use cases. + Int javaXmxMb = 4096 Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } From a18806aa27a484f715d92b253d17fbf7b2a94782 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:22:39 +0200 Subject: [PATCH 187/902] Reduce flagstat memory --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 49495693..5daf57ab 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -209,7 +209,7 @@ task Flagstat { File inputBam String outputPath - String memory = "1G" + String memory = "256M" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } From 9749968ebb445bd9630940e5e2de478dc12ac220 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:26:34 +0200 Subject: [PATCH 188/902] Only little memory is needed for gatherbqsrreports --- gatk.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index f79312fc..0d36e440 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -725,8 +725,8 @@ task GatherBqsrReports { Array[File] inputBQSRreports String outputReportPath - String memory = "1G" - String javaXmx = "500M" + Int memoryMb = 256 + javaXmxMb + Int javaXmxMb = 256 Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -734,7 +734,7 @@ task GatherBqsrReports { command { set -e mkdir -p "$(dirname ~{outputReportPath})" - gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb} -XX:ParallelGCThreads=1' \ GatherBQSRReports \ -I ~{sep=' -I ' inputBQSRreports} \ -O ~{outputReportPath} @@ -747,15 +747,15 @@ task GatherBqsrReports { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { inputBQSRreports: {description: "The BQSR reports to be merged.", category: "required"} outputReportPath: {description: "The location of the combined BQSR report.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 09772e613aaf6d9029de629b9585e060bcfd1d2d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:28:05 +0200 Subject: [PATCH 189/902] Comment on real life use for sambamba markdup --- sambamba.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sambamba.wdl b/sambamba.wdl index bf58dbc8..cd8da21e 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -37,7 +37,7 @@ task Markdup { Boolean removeDuplicates = false # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 1024 mb as a margin of safety + # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" # Time minute calculation does not work well for higher number of threads. From 46ea0ef31b4a49abc7859b7371d05770c040b10f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:30:30 +0200 Subject: [PATCH 190/902] use less memory on bcftools stats. --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index bd79c2c6..e1ec3059 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -86,7 +86,7 @@ task Stats { Int threads = 0 Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. - String memory = "2G" # TODO: Safe estimate, refine later. + String memory = "256M" String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } From d9b05b5897e9cd33b74715da0754f4806f90b1fa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:32:31 +0200 Subject: [PATCH 191/902] 2GB per thread is sufficient --- bwa.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 0c35bf3a..4a0e86a8 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -30,7 +30,7 @@ task Mem { Int threads = 4 Int sortThreads = 1 - Int sortMemoryPerThreadGb = 4 + Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads @@ -95,7 +95,7 @@ task Kit { Int threads = 4 Int sortThreads = 1 - Int sortMemoryPerThreadGb = 4 + Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads From ec2bc612c1331e044608c629797076eeedec3187 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:45:52 +0200 Subject: [PATCH 192/902] update changelog --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a546718..4acadc57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,23 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Picard SortSam added as a task. ++ Md5 files are no longer created by default on Picard tasks that generate + BAM files. ++ Changed PicardMarkduplicates to use COMPRESSION_LEVEL=1 by default + speeding up execution by 2x at the cost of a 20% larger BAM file. ++ Added sambamba markdup and sambamba sort. NOTE: samtools sort is more + efficient and is recommended. ++ Correctly represent samtools inconsistent use of the threads flag. + Sometimes it means 'threads' sometimes it means 'additional threads'. + BioWDL tasks now use only threads. The `threads - 1` conversion is + applied where necessary for samtools tools that use additional threads. ++ Updated BWA MEM and BWA KIT tasks to use samtools sort version 1.10 for + sorting the BAM file. ++ Updated memory requirements on bcftools Stats, bwa mem, bwakit, GATK + ApplyBQSR, GATK BaseRecalibrator, GATK GatherBqsrReports, Gatk + HaplotypeCaller, Picard CollectMultipleMetrics, Picard GatherBamFiles, + samtools Flagstat, samtools sort and bcftools stats. + TALON: Update `FilterTalonTranscripts` to new version, which removes the pairingsFile and replaces this with datasetsFile. + TALON: Add `GetSpliceJunctions` & `LabelReads` tasks. From 4f41ec07bbb1bbbafd70985ecb812d64c0444f53 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:55:35 +0200 Subject: [PATCH 193/902] add forgotten M --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 0d36e440..939513db 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -734,7 +734,7 @@ task GatherBqsrReports { command { set -e mkdir -p "$(dirname ~{outputReportPath})" - gatk --java-options '-Xmx~{javaXmxMb} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \ GatherBQSRReports \ -I ~{sep=' -I ' inputBQSRreports} \ -O ~{outputReportPath} From ca368e7e5a4ff8c1d4776d0095c0d62aeb5b1083 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 13:41:56 +0200 Subject: [PATCH 194/902] add parameter_meta --- bwa.wdl | 1 + picard.wdl | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 4a0e86a8..78881ad2 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -78,6 +78,7 @@ task Mem { memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/picard.wdl b/picard.wdl index 0e877f23..c090455e 100644 --- a/picard.wdl +++ b/picard.wdl @@ -402,7 +402,8 @@ task GatherBamFiles { inputBams: {description: "The BAM files to be merged together.", category: "required"} inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} - + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} From 50c5d957408dbf8a6f1d6aa79c0a3b05ffdde664 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Jul 2020 12:41:49 +0200 Subject: [PATCH 195/902] Add samtools controls to hisat2 --- CHANGELOG.md | 3 +++ hisat2.wdl | 21 +++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4acadc57..edfffb5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Hisat2 task has added controls for samtools. ++ Alignment tasks no longer produce BAM indexes as these are not needed + by the markduplicates step. + Picard SortSam added as a task. + Md5 files are no longer created by default on Picard tasks that generate BAM files. diff --git a/hisat2.wdl b/hisat2.wdl index 5937f86d..77c370fd 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -34,7 +34,10 @@ task Hisat2 { String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" Int threads = 4 - String memory = "~{threads + 5 + ceil(size(indexFiles, "G"))}G" + Int sortThreads = 1 + Int sortMemoryPerThreadGb = 2 + Int compressionLevel = 1 + Int memoryGb = 1 + threads + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools @@ -59,18 +62,21 @@ task Hisat2 { ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ --new-summary \ --summary-file ~{summaryFilePath} \ - | samtools sort > ~{outputBam} - samtools index ~{outputBam} ~{bamIndexPath} + | samtools sort \ + ~{"-@ " + sortThreads} \ + -m ~{sortMemoryPerThreadGb}G \ + -l ~{compressionLevel} \ + - \ + -o ~{outputBam} } output { File bamFile = outputBam - File bamIndex = bamIndexPath File summaryFile = summaryFilePath } runtime { - memory: memory + memory: "~{memoryGb}G" cpu: threads + 1 time_minutes: timeMinutes docker: dockerImage @@ -88,9 +94,12 @@ task Hisat2 { downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} } } \ No newline at end of file From b5d6e71a72124dc53eb9344ad0d6a1857bdaea69 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Jul 2020 12:47:13 +0200 Subject: [PATCH 196/902] Add outputBAMcompression to STAR --- CHANGELOG.md | 2 ++ star.wdl | 3 +++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index edfffb5e..e70b06a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 + compression. + Hisat2 task has added controls for samtools. + Alignment tasks no longer produce BAM indexes as these are not needed by the markduplicates step. diff --git a/star.wdl b/star.wdl index 4da67f72..3d0e2eb0 100644 --- a/star.wdl +++ b/star.wdl @@ -103,6 +103,7 @@ task Star { String? twopassMode = "Basic" Array[String]? outSAMattrRGline String? outSAMunmapped = "Within KeepPairs" + Int outBAMcompression = 1 Int? limitBAMsortRAM Int runThreadN = 4 @@ -129,6 +130,7 @@ task Star { --outFileNamePrefix ~{outFileNamePrefix} \ --genomeDir ~{sub(indexFiles[0], basename(indexFiles[0]), "")} \ --outSAMtype ~{outSAMtype} \ + --outBAMcompression ~{outBAMcompression} \ --readFilesCommand ~{readFilesCommand} \ ~{"--outFilterScoreMin " + outFilterScoreMin} \ ~{"--outFilterScoreMinOverLread " + outFilterScoreMinOverLread} \ @@ -172,6 +174,7 @@ task Star { limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"} runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From bef15749a53fe13c91aa9a9f28344c0d0b08001d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Jul 2020 13:44:07 +0200 Subject: [PATCH 197/902] use samtools 1.10 and hisat 2.2.0 --- hisat2.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hisat2.wdl b/hisat2.wdl index 77c370fd..c24610ed 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -41,8 +41,8 @@ task Hisat2 { Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools - # hisat2=2.1.0, samtools=1.8 - String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2388ff67fc407dad75774291ca5038f40cac4be0-0" + # hisat2=2.2.0, samtools=1.10 + String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2880dd9d8ad0a7b221d4eacda9a818e92983128d-0" } String bamIndexPath = sub(outputBam, "\.bam$", ".bai") From d0207b7f8b6234e17a03ffa4073190ea543b5c48 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Jul 2020 13:44:53 +0200 Subject: [PATCH 198/902] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e70b06a6..a51d097f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Default docker images for bwa, bwakit and hisat2 updated to include samtools + 1.10. + Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 compression. + Hisat2 task has added controls for samtools. From 8e3788d8dd926e5924226a965b8f8cf688141ac3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 9 Jul 2020 15:53:39 +0200 Subject: [PATCH 199/902] Use htsjdk inflaters and deflaters for markduplicates --- picard.wdl | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/picard.wdl b/picard.wdl index c090455e..4dd4d970 100644 --- a/picard.wdl +++ b/picard.wdl @@ -467,9 +467,15 @@ task MarkDuplicates { String metricsPath Int compressionLevel = 1 Boolean createMd5File = false + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. + Boolean useJdkDeflater = true + + # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. + # https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L1040 + Int javaXmxMb = 6656 # 6.5G + String memoryMb = javaXmxMb + 512 - String memory = "9G" - String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -488,7 +494,7 @@ task MarkDuplicates { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \ MarkDuplicates \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ @@ -500,7 +506,9 @@ task MarkDuplicates { CLEAR_DT="false" \ CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false \ - CREATE_MD5_FILE=~{true="true" false="false" createMd5File} + CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -513,7 +521,7 @@ task MarkDuplicates { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -523,8 +531,8 @@ task MarkDuplicates { metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 7f293e334fee666f9719ba6924a45c5a1678b441 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 9 Jul 2020 16:04:19 +0200 Subject: [PATCH 200/902] Update changelog --- CHANGELOG.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4acadc57..4678476f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,15 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Picard Markduplicates now uses 7G of RAM just like in GATK's best practice + example pipeline. + Picard SortSam added as a task. + Md5 files are no longer created by default on Picard tasks that generate BAM files. -+ Changed PicardMarkduplicates to use COMPRESSION_LEVEL=1 by default - speeding up execution by 2x at the cost of a 20% larger BAM file. ++ Changed PicardMarkduplicates to use COMPRESSION_LEVEL=1 by default with + the htsjdk deflater. + This makes the task finish in 30% less time at the cost of a 6% larger BAM + file. + Added sambamba markdup and sambamba sort. NOTE: samtools sort is more efficient and is recommended. + Correctly represent samtools inconsistent use of the threads flag. From 3e7b07970fa1439b3718f8f7d912858c94f657d4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 13 Jul 2020 10:42:14 +0200 Subject: [PATCH 201/902] Use more sort threads for alignment if more alignment threads are used --- bwa.wdl | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 78881ad2..58e1dc80 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,16 +29,23 @@ task Mem { String? readgroup Int threads = 4 - Int sortThreads = 1 + Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) # This container contains: samtools (1.10), bwa (0.7.17-r1188) String dockerImage = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" } + # Samtools sort may block the pipe while it is writing data to disk. + # This can lead to cpu underutilization. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) + Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + command { set -e -o pipefail mkdir -p "$(dirname ~{outputPath})" @@ -49,7 +56,7 @@ task Mem { ~{read1} \ ~{read2} \ | samtools sort \ - ~{"-@ " + sortThreads} \ + ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ @@ -62,7 +69,7 @@ task Mem { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage } @@ -95,16 +102,23 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - Int sortThreads = 1 + Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } + # Samtools sort may block the pipe while it is writing data to disk. + # This can lead to cpu underutilization. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) + Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -119,7 +133,7 @@ task Kit { -p ~{outputPrefix}.hla \ ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ - ~{"-@ " + sortThreads} \ + ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ @@ -134,7 +148,7 @@ task Kit { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{memoryGb}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage } From 5e122542455264c41e6ff2aa9d7052b31ca13345 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 13 Jul 2020 11:59:28 +0200 Subject: [PATCH 202/902] Request less memory for htseq-count --- htseq.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htseq.wdl b/htseq.wdl index 35faeef3..cba32c6f 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -32,7 +32,7 @@ task HTSeqCount { String? idattr Array[String] additionalAttributes = [] - String memory = "40G" + String memory = "8G" Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) String dockerImage = "quay.io/biocontainers/htseq:0.11.2--py37h637b7d7_1" } From 4bfae06690bf1d817cc73d4671620ed47838d19b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 16 Jul 2020 07:33:00 +0200 Subject: [PATCH 203/902] Add parameter_meta --- picard.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 4dd4d970..f9dd5210 100644 --- a/picard.wdl +++ b/picard.wdl @@ -469,6 +469,7 @@ task MarkDuplicates { Boolean createMd5File = false Boolean useJdkInflater = true # Slightly faster than the intel one. # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. + # NOTE: this might change in the future when the intel deflater is updated! Boolean useJdkDeflater = true # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. @@ -530,7 +531,10 @@ task MarkDuplicates { outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} - + createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} From 79faa7cad3bb8049c9d617804d7d2c74db29e069 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 16 Jul 2020 15:55:27 +0200 Subject: [PATCH 204/902] update cutadapt container --- cutadapt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index ad32ff21..d125af43 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -81,7 +81,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" + String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37h516909a_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) From 3e254feafd92aed3a88c4f9e37750f7a9dbeeba6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 16 Jul 2020 16:03:51 +0200 Subject: [PATCH 205/902] update samtools image --- samtools.wdl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 5daf57ab..0b8394bf 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -114,7 +114,7 @@ task Fastq { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } command { @@ -170,7 +170,7 @@ task FilterShortReadsBam { String outputPathBam String memory = "1G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -211,7 +211,7 @@ task Flagstat { String memory = "256M" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } command { @@ -247,7 +247,7 @@ task Index { String? outputBamPath String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } # Select_first is needed, otherwise womtool validate fails. @@ -296,7 +296,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } command { @@ -332,7 +332,7 @@ task Merge { Int threads = 1 Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -483,7 +483,7 @@ task View { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } String outputIndexPath = basename(outputFileName) + ".bai" From 8845e4f521375a41711625cab0bc010be1b41616 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 16 Jul 2020 16:06:53 +0200 Subject: [PATCH 206/902] Update picard container --- picard.wdl | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/picard.wdl b/picard.wdl index f9dd5210..adb55b4b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -89,7 +89,7 @@ task CollectMultipleMetrics { Int javaXmxMb = 3072 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -205,7 +205,7 @@ task CollectRnaSeqMetrics { String javaXmx = "8G" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -263,7 +263,7 @@ task CollectTargetedPcrMetrics { String memory = "4G" String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -322,7 +322,7 @@ task CreateSequenceDictionary { String memory = "3G" String javaXmx = "2G" - String dockerImage = "quay.io/biocontainers/picard:2.22.3--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -370,7 +370,7 @@ task GatherBamFiles { Boolean createMd5File = false # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -422,7 +422,7 @@ task GatherVcfs { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -478,7 +478,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" # The program default for READ_NAME_REGEX is appropriate in nearly every case. # Sometimes we wish to supply "null" in order to turn off optical duplicate detection @@ -554,7 +554,7 @@ task MergeVCFs { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } # Using MergeVcfs instead of GatherVcfs so we can create indices @@ -603,7 +603,7 @@ task SamToFastq { String memory = "17G" String javaXmx = "16G" # High memory default to avoid crashes. - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" File? NONE } @@ -640,7 +640,7 @@ task ScatterIntervalList { String memory = "4G" String javaXmx = "3G" - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -681,9 +681,7 @@ task SortSam { # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - # A mulled container is needed to have both picard and bwa in one container. - # This container contains: picard (2.18.7), bwa (0.7.17-r1188) - String dockerImage = "quay.io/biocontainers/picard:2.23.1--h37ae868_0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { From 1308c1812e3c3c62546bd44cd5015923f8cd0024 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 08:16:57 +0200 Subject: [PATCH 207/902] update default images --- bcftools.wdl | 2 +- chunked-scatter.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index e1ec3059..60224b0b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -87,7 +87,7 @@ task Stats { Int threads = 0 Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String memory = "256M" - String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } command { diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 3ef0c747..b54a7d2e 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -30,7 +30,7 @@ task ChunkedScatter { String memory = "256M" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" + String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } command { From 77b3d960288160fc178cace4c1c3a266d0fc205c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 08:18:25 +0200 Subject: [PATCH 208/902] update docker image --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 60224b0b..7f100f9b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -28,7 +28,7 @@ task Bcf2Vcf { String outputPath = "./bcftools/SV.vcf" String memory = "2G" Int timeMinutes = 1 + ceil(size(bcf, "G")) - String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } command { From 86cd20381fdee68938b98a92cd09fbf1f9f0642b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 08:34:03 +0200 Subject: [PATCH 209/902] Overhaul view task --- bcftools.wdl | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 7f100f9b..e60142db 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -22,23 +22,32 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Bcf2Vcf { +task View { input { - File bcf - String outputPath = "./bcftools/SV.vcf" - String memory = "2G" - Int timeMinutes = 1 + ceil(size(bcf, "G")) + File inputFile + String outputPath = "output.vcf.gz" + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + String outputType = "z" + Int compressionLevel = 1 } command { set -e mkdir -p "$(dirname ~{outputPath})" - bcftools view ~{bcf} -O v -o ~{outputPath} + bcftools view \ + ~{inputFile} -o ~{outputPath} \ + -O ~{outputType} \ + -l ~{compressionLevel} + ~{inputFile} + bcftools index --tbi ~{outputPath} + } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" } runtime { @@ -48,9 +57,11 @@ task Bcf2Vcf { } parameter_meta { - bcf: {description: "The generated BCF from an SV caller", category: "required"} + inputFile: {description: "A vcf or bcf file", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 631b5dfa75c053afb0cd97be154bd34534e98167 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 09:27:27 +0200 Subject: [PATCH 210/902] fix view task --- bcftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index e60142db..b99a8cf5 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -37,9 +37,9 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ - ~{inputFile} -o ~{outputPath} \ + -o ~{outputPath} \ -O ~{outputType} \ - -l ~{compressionLevel} + -l ~{compressionLevel} \ ~{inputFile} bcftools index --tbi ~{outputPath} From 959bf985ed283d374f747a4791bed1cde753c201 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 11:09:48 +0200 Subject: [PATCH 211/902] Update default gatk image --- gatk.wdl | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 939513db..0f5218bd 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -35,7 +35,7 @@ task AnnotateIntervals { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -96,7 +96,7 @@ task ApplyBQSR { Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 2048 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -168,7 +168,7 @@ task BaseRecalibrator { Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 1024 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -227,7 +227,7 @@ task CalculateContamination { String memory = "13G" String javaXmx = "12G" Int timeMinutes = 180 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -271,7 +271,7 @@ task CallCopyRatioSegments { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -320,7 +320,7 @@ task CollectAllelicCounts { String memory = "11G" String javaXmx = "10G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -376,7 +376,7 @@ task CollectReadCounts { String memory = "8G" String javaXmx = "7G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -433,7 +433,7 @@ task CombineGVCFs { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -607,7 +607,7 @@ task DenoiseReadCounts { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -665,7 +665,7 @@ task FilterMutectCalls { String memory = "13G" String javaXmx = "12G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -728,7 +728,7 @@ task GatherBqsrReports { Int memoryMb = 256 + javaXmxMb Int javaXmxMb = 256 Int timeMinutes = 1 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -774,7 +774,7 @@ task GenomicsDBImport { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 180 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -833,7 +833,7 @@ task GenotypeGVCFs { String memory = "7G" String javaXmx = "6G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -899,7 +899,7 @@ task GetPileupSummaries { String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -966,7 +966,7 @@ task HaplotypeCaller { # Memory increases with time used. 4G should cover most use cases. Int javaXmxMb = 4096 Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1040,7 +1040,7 @@ task LearnReadOrientationModel { String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1079,7 +1079,7 @@ task MergeStats { String memory = "15G" String javaXmx = "14G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1126,7 +1126,7 @@ task ModelSegments { String memory = "11G" String javaXmx = "10G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1202,7 +1202,7 @@ task MuTect2 { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 240 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1389,7 +1389,7 @@ task PreprocessIntervals { String memory = "4G" String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1447,7 +1447,7 @@ task SelectVariants { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1507,7 +1507,7 @@ task SplitNCigarReads { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1576,7 +1576,7 @@ task VariantEval { String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.7.0--py38_0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1647,7 +1647,7 @@ task VariantFiltration { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { From 4b065c35ce49b561dc2aa1a4118f01e31a8bdfe4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 13:23:47 +0200 Subject: [PATCH 212/902] Update task with newest features --- htseq.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/htseq.wdl b/htseq.wdl index cba32c6f..829dd32f 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -25,23 +25,23 @@ task HTSeqCount { Array[File]+ inputBams File gtfFile String outputTable = "output.tsv" - String format = "bam" String order = "pos" String stranded = "no" String? featureType String? idattr Array[String] additionalAttributes = [] + Int threads = 1 String memory = "8G" Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) - String dockerImage = "quay.io/biocontainers/htseq:0.11.2--py37h637b7d7_1" + String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" } command { set -e mkdir -p "$(dirname ~{outputTable})" htseq-count \ - -f ~{format} \ + --nprocesses ~{threads} \ -r ~{order} \ -s ~{stranded} \ ~{"--type " + featureType} \ @@ -49,7 +49,7 @@ task HTSeqCount { ~{true="--additional-attr " false="" length(additionalAttributes) > 0 }~{sep=" --additional-attr " additionalAttributes} \ ~{sep=" " inputBams} \ ~{gtfFile} \ - > ~{outputTable} + -c ~{outputTable} } output { @@ -57,6 +57,7 @@ task HTSeqCount { } runtime { + cpu: threads time_minutes: timeMinutes memory: memory docker: dockerImage From 9c8b72b87bd71aa0e609e5e9b5ef7a76d37b1933 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 13:35:07 +0200 Subject: [PATCH 213/902] Update parameter_meta --- htseq.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/htseq.wdl b/htseq.wdl index 829dd32f..cbd8e2ac 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -31,7 +31,7 @@ task HTSeqCount { String? idattr Array[String] additionalAttributes = [] - Int threads = 1 + Int nprocesses = 1 String memory = "8G" Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" @@ -41,7 +41,7 @@ task HTSeqCount { set -e mkdir -p "$(dirname ~{outputTable})" htseq-count \ - --nprocesses ~{threads} \ + --nprocesses ~{nprocesses} \ -r ~{order} \ -s ~{stranded} \ ~{"--type " + featureType} \ @@ -57,7 +57,7 @@ task HTSeqCount { } runtime { - cpu: threads + cpu: nprocesses time_minutes: timeMinutes memory: memory docker: dockerImage @@ -67,7 +67,7 @@ task HTSeqCount { inputBams: {description: "The input BAM files.", category: "required"} gtfFile: {description: "A GTF/GFF file containing the features of interest.", category: "required"} outputTable: {description: "The path to which the output table should be written.", category: "common"} - format: {description: "Equivalent to the -f option of htseq-count.", category: "advanced"} + nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"} order: {description: "Equivalent to the -r option of htseq-count.", category: "advanced"} stranded: {description: "Equivalent to the -s option of htseq-count.", category: "common"} featureType: {description: "Equivalent to the --type option of htseq-count.", category: "advanced"} From fdf0e47fa2f22e51803f5610991dd831bc47aaf8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 15:15:55 +0200 Subject: [PATCH 214/902] Update images --- gatk.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 939513db..f20bf70a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -560,7 +560,7 @@ task CreateReadCountPanelOfNormals { String memory = "8G" String javaXmx = "7G" Int timeMinutes = 5 - String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer causes a spark related error for some reason... + String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason... } command { @@ -1271,7 +1271,7 @@ task PlotDenoisedCopyRatios { String memory = "4G" String javaXmx = "3G" Int timeMinutes = 2 - String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. + String dockerImage = "broadinstitute/gatk:4.1.8.0" } command { @@ -1289,7 +1289,7 @@ task PlotDenoisedCopyRatios { output { File denoisedCopyRatiosPlot = outputDir + "/" + outputPrefix + ".denoised.png" - File denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" + File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" File standardizedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".standardizedMAD.txt" File denoisedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".denoisedMAD.txt" File deltaMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".deltaMAD.txt" @@ -1331,7 +1331,7 @@ task PlotModeledSegments { String memory = "4G" String javaXmx = "3G" Int timeMinutes = 2 - String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. + String dockerImage = "broadinstitute/gatk:4.1.8.0" } command { From 156e9cc405d8124b5cb2ec577ab21e33b8c0c8ad Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 15:38:59 +0200 Subject: [PATCH 215/902] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f50bf650..10dc2f32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Reworked bcf2vcf task into bcftools view task. ++ Update docker images for samtools, bcftools, picard, GATK, cutadapt and + chunked-scatter. + Default docker images for bwa, bwakit and hisat2 updated to include samtools 1.10. + Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 From 9826e25a04aa9db88ebdc23cc22da5f69916eb50 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 20 Jul 2020 07:40:53 +0200 Subject: [PATCH 216/902] Add missing interpunction. Co-authored-by: Jasper --- bcftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index b99a8cf5..24fbb44c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -57,7 +57,7 @@ task View { } parameter_meta { - inputFile: {description: "A vcf or bcf file", category: "required"} + inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf"} memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -170,4 +170,4 @@ task Stats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} } -} \ No newline at end of file +} From 633c58b2f8f467c691ff24537fafc6359bda14d3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 20 Jul 2020 07:41:42 +0200 Subject: [PATCH 217/902] Remove redundant newline. Co-authored-by: Jasper --- bcftools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 24fbb44c..2677899b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -42,7 +42,6 @@ task View { -l ~{compressionLevel} \ ~{inputFile} bcftools index --tbi ~{outputPath} - } output { From 34163a93692614ae364c0f85c66e410954bf39f0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 20 Jul 2020 07:50:31 +0200 Subject: [PATCH 218/902] Update changelog --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10dc2f32..22e6c56f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,10 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- + Reworked bcf2vcf task into bcftools view task. -+ Update docker images for samtools, bcftools, picard, GATK, cutadapt and - chunked-scatter. ++ Removed the redundant format flag from the htseq interface. This is + autodetected in newer versions of htseq. ++ Update docker images for samtools, bcftools, picard, GATK, cutadapt, htseq + and chunked-scatter. + Default docker images for bwa, bwakit and hisat2 updated to include samtools 1.10. + Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 From c248c5bba1811f96b199df9058b18bd44aaed0c0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Jul 2020 16:54:03 +0200 Subject: [PATCH 219/902] Update output names for the first set of tasks. --- CHANGELOG.md | 2 ++ ccs.wdl | 18 +++++----- centrifuge.wdl | 42 +++++++++++------------ isoseq3.wdl | 28 ++++++++-------- lima.wdl | 38 ++++++++++----------- talon.wdl | 90 +++++++++++++++++++++++++------------------------- 6 files changed, 110 insertions(+), 108 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22e6c56f..9796c484 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Renamed outputs of tasks used in the TALON-WDL, PacBio-subreads-processing & + sequence-classification pipelines. + Reworked bcf2vcf task into bcftools view task. + Removed the redundant format flag from the htseq interface. This is autodetected in newer versions of htseq. diff --git a/ccs.wdl b/ccs.wdl index 1762ac75..60e43711 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -55,10 +55,10 @@ task CCS { } output { - File outputCCSfile = outputPrefix + ".ccs.bam" - File outputCCSindexFile = outputPrefix + ".ccs.bam.pbi" - File outputReportFile = outputPrefix + ".ccs.report.txt" - File outputSTDERRfile = outputPrefix + ".ccs.stderr.log" + File ccsBam = outputPrefix + ".ccs.bam" + File ccsBamIndex = outputPrefix + ".ccs.bam.pbi" + File ccsReport = outputPrefix + ".ccs.report.txt" + File ccsStderr = outputPrefix + ".ccs.stderr.log" } runtime { @@ -70,7 +70,7 @@ task CCS { parameter_meta { # inputs - minPasses: {description: "Minimum number of full-length subreads required to generate CCS for a ZMW.", category: "advanced"} + minPasses: {description: "Minimum number of full-length subreads required to generate ccs for a ZMW.", category: "advanced"} minLength: {description: "Minimum draft length before polishing.", category: "advanced"} maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} @@ -84,9 +84,9 @@ task CCS { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputCCSfile: {description: "Consensus reads output file."} - outputCCSindexFile: {description: "Index of consensus reads output file."} - outputReportFile: {description: "CCS results report file."} - outputSTDERRfile: {description: "CCS STDERR log file."} + ccsBam: {description: "Consensus reads output file."} + ccsBamIndex: {description: "Index of consensus reads output file."} + ccsReport: {description: "Ccs results report file."} + ccsStderr: {description: "Ccs STDERR log file."} } } diff --git a/centrifuge.wdl b/centrifuge.wdl index f2b26043..e1cddcad 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -59,7 +59,7 @@ task Build { } output { - Array[File] outputIndex = glob(outputPrefix + "/" + indexBasename + "*.cf") + Array[File] index = glob(outputPrefix + "/" + indexBasename + "*.cf") } runtime { @@ -75,7 +75,7 @@ task Build { conversionTable: {description: "List of UIDs (unique ID) and corresponding taxonomic IDs.", category: "required"} taxonomyTree: {description: "Taxonomic tree (e.g. nodes.dmp).", category: "required"} nameTable: {description: "Name table (e.g. names.dmp).", category: "required"} - referenceFile: {description: "A comma-separated list of FASTA files containing the reference sequences to be aligned to.", category: "required"} + referenceFile: {description: "A comma-separated list of fasta files containing the reference sequences to be aligned to.", category: "required"} indexBasename: {description: "The basename of the index files to write.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} offrate: {description: "The number of rows marked by the indexer.", category: "common"} @@ -88,7 +88,7 @@ task Build { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputIndex: {description: "Generated Centrifuge index."} + index: {description: "Generated centrifuge index."} } } @@ -142,9 +142,9 @@ task Classify { >>> output { - File outputMetrics = outputPrefix + "_alignment_metrics.tsv" - File outputClassification = outputPrefix + "_classification.tsv" - File outputReport = outputPrefix + "_output_report.tsv" + File metrics = outputPrefix + "_alignment_metrics.tsv" + File classification = outputPrefix + "_classification.tsv" + File report = outputPrefix + "_output_report.tsv" } runtime { @@ -156,7 +156,7 @@ task Classify { parameter_meta { # inputs inputFormat: {description: "The format of the read file(s).", category: "required"} - phred64: {description: "If set to true, Phred+64 encoding is used.", category: "required"} + phred64: {description: "If set to true, phred+64 encoding is used.", category: "required"} minHitLength: {description: "Minimum length of partial hits.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} @@ -172,9 +172,9 @@ task Classify { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputMetrics: {description: "File with Centrifuge metrics."} - outputClassification: {description: "File with the classification results."} - outputReport: {description: "File with a classification summary."} + metrics: {description: "File with centrifuge metrics."} + classification: {description: "File with the classification results."} + report: {description: "File with a classification summary."} } } @@ -209,7 +209,7 @@ task Inspect { >>> output { - File outputInspect = outputPrefix + "/" + printOption + File inspectResult = outputPrefix + "/" + printOption } runtime { @@ -223,13 +223,13 @@ task Inspect { printOption: {description: "The output option for inspect (fasta, summary, conversionTable, taxonomyTree, nameTable, sizeTable)", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - across: {description: "When printing FASTA output, output a newline character every bases.", category: "common"} + across: {description: "When printing fasta output, output a newline character every bases.", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputInspect: {description: "Output file according to output option."} + inspectResult: {description: "Output file according to output option."} } } @@ -300,7 +300,7 @@ task DownloadTaxonomy { } } -task Kreport { +task KReport { input { File centrifugeClassification String outputPrefix @@ -337,7 +337,7 @@ task Kreport { >>> output { - File outputKreport = outputPrefix + "_kreport.tsv" + File KReport = outputPrefix + "_kreport.tsv" } runtime { @@ -348,10 +348,10 @@ task Kreport { parameter_meta { # inputs - centrifugeClassification: {description: "File with Centrifuge classification results.", category: "required"} + centrifugeClassification: {description: "File with centrifuge classification results.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} - noLCA: {description: "Do not report the LCA of multiple assignments, but report count fractions at the taxa.", category: "advanced"} + noLCA: {description: "Do not report the lca of multiple assignments, but report count fractions at the taxa.", category: "advanced"} showZeros: {description: "Show clades that have zero reads.", category: "advanced"} isCountTable: {description: "The format of the file is taxIDCOUNT.", category: "advanced"} minimumScore: {description: "Require a minimum score for reads to be counted.", category: "advanced"} @@ -361,7 +361,7 @@ task Kreport { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputKreport: {description: "File with kraken style report."} + KReport: {description: "File with kraken style report."} } } @@ -384,7 +384,7 @@ task KTimportTaxonomy { } output { - File outputKronaPlot = outputPrefix + "_krona.html" + File kronaPlot = outputPrefix + "_krona.html" } runtime { @@ -395,13 +395,13 @@ task KTimportTaxonomy { parameter_meta { # inputs - inputFile: {description: "File with Centrifuge classification results.", category: "required"} + inputFile: {description: "File with centrifuge classification results.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputKronaPlot: {description: "Krona taxonomy plot html file."} + kronaPlot: {description: "Krona taxonomy plot html file."} } } diff --git a/isoseq3.wdl b/isoseq3.wdl index 9e0dfdb2..f369553f 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -51,12 +51,12 @@ task Refine { } output { - File outputFLNCfile = outputDir + "/" + outputNamePrefix + ".bam" - File outputFLNCindexFile = outputDir + "/" + outputNamePrefix + ".bam.pbi" - File outputConsensusReadsetFile = outputDir + "/" + outputNamePrefix + ".consensusreadset.xml" - File outputFilterSummaryFile = outputDir + "/" + outputNamePrefix + ".filter_summary.json" - File outputReportFile = outputDir + "/" + outputNamePrefix + ".report.csv" - File outputSTDERRfile = outputDir + "/" + outputNamePrefix + ".stderr.log" + File refineBam = outputDir + "/" + outputNamePrefix + ".bam" + File refineBamIndex = outputDir + "/" + outputNamePrefix + ".bam.pbi" + File refineConsensusReadset = outputDir + "/" + outputNamePrefix + ".consensusreadset.xml" + File refineFilterSummary = outputDir + "/" + outputNamePrefix + ".filter_summary.json" + File refineReport = outputDir + "/" + outputNamePrefix + ".report.csv" + File refineStderr = outputDir + "/" + outputNamePrefix + ".stderr.log" } runtime { @@ -69,9 +69,9 @@ task Refine { parameter_meta { # inputs minPolyAlength: {description: "Minimum poly(A) tail length.", category: "advanced"} - requirePolyA: {description: "Require FL reads to have a poly(A) tail and remove it.", category: "common"} + requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} - inputBamFile: {description: "BAM input file.", category: "required"} + inputBamFile: {description: "Bam input file.", category: "required"} primerFile: {description: "Barcode/primer fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputNamePrefix: {description: "Basename of the output files.", category: "required"} @@ -81,11 +81,11 @@ task Refine { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputFLNCfile: {description: "Filtered reads output file."} - outputFLNCindexFile: {description: "Index of filtered reads output file."} - outputSTDERRfile: {description: "Refine STDERR log file."} - outputConsensusReadsetFile: {description: "Refine consensus readset XML file."} - outputFilterSummaryFile: {description: "Refine summary file."} - outputReportFile: {description: "Refine report file."} + refineBam: {description: "Filtered reads output file."} + refineBamIndex: {description: "Index of filtered reads output file."} + refineConsensusReadset: {description: "Refine consensus readset xml file."} + refineFilterSummary: {description: "Refine summary file."} + refineReport: {description: "Refine report file."} + refineStderr: {description: "Refine stderr log file."} } } diff --git a/lima.wdl b/lima.wdl index ddd37da4..2e8a7085 100644 --- a/lima.wdl +++ b/lima.wdl @@ -98,14 +98,14 @@ task Lima { } output { - Array[File] outputFLfile = glob("*.bam") - Array[File] outputFLindexFile = glob("*.bam.pbi") - Array[File] outputFLxmlFile = glob("*.subreadset.xml") - File outputSTDERRfile = outputPrefix + ".fl.stderr.log" - File outputJSONfile = outputPrefix + ".fl.json" - File outputCountsFile = outputPrefix + ".fl.lima.counts" - File outputReportFile = outputPrefix + ".fl.lima.report" - File outputSummaryFile = outputPrefix + ".fl.lima.summary" + Array[File] limaBam = glob("*.bam") + Array[File] limaBamIndex = glob("*.bam.pbi") + Array[File] limaXml = glob("*.subreadset.xml") + File limaStderr = outputPrefix + ".fl.stderr.log" + File limaJson = outputPrefix + ".fl.json" + File limaCounts = outputPrefix + ".fl.lima.counts" + File limaReport = outputPrefix + ".fl.lima.report" + File limaSummary = outputPrefix + ".fl.lima.summary" } runtime { @@ -131,15 +131,15 @@ task Lima { minEndScore: {description: "Minimum end barcode score threshold is applied to the individual leading and trailing ends.", category: "advanced"} minSignalIncrease: {description: "The minimal score difference, between first and combined, required to call a barcode pair different.", category: "advanced"} minScoreLead: {description: "The minimal score lead required to call a barcode pair significant.", category: "common"} - ccsMode: {description: "CCS mode, use optimal alignment options.", category: "common"} - splitBamNamed: {description: "Split BAM output by resolved barcode pair name.", category: "common"} + ccsMode: {description: "Ccs mode, use optimal alignment options.", category: "common"} + splitBamNamed: {description: "Split bam output by resolved barcode pair name.", category: "common"} scoredAdapterRatio: {description: "Minimum ratio of scored vs sequenced adapters.", category: "advanced"} peek: {description: "Demux the first N ZMWs and return the mean score, 0 means peeking deactivated.", category: "advanced"} guess: {description: "Try to guess the used barcodes, using the provided mean score threshold, 0 means guessing deactivated.", category: "advanced"} guessMinCount: {description: "Minimum number of ZMWs observed to whitelist barcodes.", category: "advanced"} peekGuess: {description: "Try to infer the used barcodes subset, by peeking at the first 50,000 ZMWs.", category: "advanced"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} - inputBamFile: {description: "BAM input file.", category: "required"} + inputBamFile: {description: "Bam input file.", category: "required"} barcodeFile: {description: "Barcode/primer fasta file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -148,13 +148,13 @@ task Lima { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputFLfile: {description: "Demultiplexed reads output file(s)."} - outputFLindexFile: {description: "Index of demultiplexed reads output file(s)."} - outputFLxmlFile: {description: "XML file of the subreadset(s)."} - outputSTDERRfile: {description: "Lima STDERR log file."} - outputJSONfile: {description: "Lima JSON file."} - outputCountsFile: {description: "Lima counts file."} - outputReportFile: {description: "Lima report file."} - outputSummaryFile: {description: "Lima summary file."} + limaBam: {description: "Demultiplexed reads output file(s)."} + limaBamIndex: {description: "Index of demultiplexed reads output file(s)."} + limaXml: {description: "Xml file of the subreadset(s)."} + limaStderr: {description: "Lima stderr log file."} + limaJson: {description: "Lima json file."} + limaCounts: {description: "Lima counts file."} + limaReport: {description: "Lima report file."} + limaSummary: {description: "Lima summary file."} } } diff --git a/talon.wdl b/talon.wdl index a469ddba..e39a3cd2 100644 --- a/talon.wdl +++ b/talon.wdl @@ -48,7 +48,7 @@ task CreateAbundanceFileFromDatabase { } output { - File outputAbundanceFile = outputPrefix + "_talon_abundance.tsv" + File abundanceFile = outputPrefix + "_talon_abundance.tsv" } runtime { @@ -59,7 +59,7 @@ task CreateAbundanceFileFromDatabase { parameter_meta { # inputs - databaseFile: {description: "TALON database.", category: "required"} + databaseFile: {description: "Talon database.", category: "required"} annotationVersion: {description: "Which annotation version to use.", category: "required"} genomeBuild: {description: "Genome build to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -70,7 +70,7 @@ task CreateAbundanceFileFromDatabase { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputAbundanceFile: {description: "Abundance for each transcript in the TALON database across datasets."} + abundanceFile: {description: "Abundance for each transcript in the talon database across datasets."} } } @@ -105,7 +105,7 @@ task CreateGtfFromDatabase { } output { - File outputGTFfile = outputPrefix + "_talon.gtf" + File gtfFile = outputPrefix + "_talon.gtf" } runtime { @@ -116,7 +116,7 @@ task CreateGtfFromDatabase { parameter_meta { # inputs - databaseFile: {description: "TALON database.", category: "required"} + databaseFile: {description: "Talon database.", category: "required"} genomeBuild: {description: "Genome build to use.", category: "required"} annotationVersion: {description: "Which annotation version to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -128,7 +128,7 @@ task CreateGtfFromDatabase { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputGTFfile: {description: "The genes, transcripts, and exons stored a TALON database in GTF format."} + gtfFile: {description: "The genes, transcripts, and exons stored a talon database in gtf format."} } } @@ -164,7 +164,7 @@ task FilterTalonTranscripts { } output { - File outputTranscriptWhitelist = outputPrefix + "_whitelist.csv" + File transcriptWhitelist = outputPrefix + "_whitelist.csv" } runtime { @@ -175,11 +175,11 @@ task FilterTalonTranscripts { parameter_meta { # inputs - databaseFile: {description: "TALON database.", category: "required"} + databaseFile: {description: "Talon database.", category: "required"} annotationVersion: {description: "Which annotation version to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} maxFracA: {description: "Maximum fraction of As to allow in the window located immediately after any read assigned to a novel transcript.", category: "advanced"} - minCount: {description: "Number of minimum occurrences required for a novel transcript PER dataset.", category: "advanced"} + minCount: {description: "Number of minimum occurrences required for a novel transcript per dataset.", category: "advanced"} allowGenomic: {description: "If this option is set, transcripts from the Genomic novelty category will be permitted in the output.", category: "advanced"} datasetsFile: {description: "Datasets to include.", category: "advanced"} minDatasets: {description: "Minimum number of datasets novel transcripts must be found in.", category: "advanced"} @@ -188,7 +188,7 @@ task FilterTalonTranscripts { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputTranscriptWhitelist: {description: "A transcript whitelist produced from the TALON database."} + transcriptWhitelist: {description: "Transcript whitelist produced from the talon database."} } } @@ -216,7 +216,7 @@ task GetReadAnnotations { } output { - File outputAnnotation = outputPrefix + "_talon_read_annot.tsv" + File readAnnotations = outputPrefix + "_talon_read_annot.tsv" } runtime { @@ -227,7 +227,7 @@ task GetReadAnnotations { parameter_meta { # inputs - databaseFile: { description: "TALON database.", category: "required"} + databaseFile: { description: "Talon database.", category: "required"} genomeBuild: {description: "Genome build to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} @@ -236,7 +236,7 @@ task GetReadAnnotations { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputAnnotation: {description: "Read-specific annotation information from a TALON database."} + readAnnotations: {description: "Read-specific annotation information from a talon database."} } } @@ -266,7 +266,7 @@ task GetSpliceJunctions { } output { - File outputSJfile = outputPrefix + "_" + runMode + "s.tsv" + File spliceJunctions = outputPrefix + "_" + runMode + "s.tsv" } runtime { @@ -277,9 +277,9 @@ task GetSpliceJunctions { parameter_meta { # inputs - SJinformationFile: {description: "TALON GTF file or database from which to extract exons/introns.", category: "required"} + SJinformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"} inputFileType: {description: "The file type of SJinformationFile.", category: "common"} - referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} + referenceGTF: {description: "Gtf reference file (ie gencode).", category: "required"} runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -287,7 +287,7 @@ task GetSpliceJunctions { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputSJfile: {description: "File containing locations, novelty and transcript assignments of exons/introns."} + spliceJunctions: {description: "File containing locations, novelty and transcript assignments of exons/introns."} } } @@ -322,7 +322,7 @@ task InitializeTalonDatabase { } output { - File outputDatabase = outputPrefix + ".db" + File database = outputPrefix + ".db" } runtime { @@ -333,11 +333,11 @@ task InitializeTalonDatabase { parameter_meta { # inputs - GTFfile: {description: "GTF annotation containing genes, transcripts, and edges.", category: "required"} - genomeBuild: {description: "Name of genome build that the GTF file is based on (ie hg38).", category: "required"} + GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} + genomeBuild: {description: "Name of genome build that the gtf file is based on (ie hg38).", category: "required"} annotationVersion: {description: "Name of supplied annotation (will be used to label data).", category: "required"} minimumLength: { description: "Minimum required transcript length.", category: "common"} - novelIDprefix: {description: "Prefix for naming novel discoveries in eventual TALON runs.", category: "common"} + novelIDprefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"} cutoff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"} cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -346,7 +346,7 @@ task InitializeTalonDatabase { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputDatabase: {description: "TALON database."} + database: {description: "Talon database."} } } @@ -379,8 +379,8 @@ task LabelReads { } output { - File outputLabeledSAM = outputPrefix + "_labeled.sam" - File outputReadLabels = outputPrefix + "_read_labels.tsv" + File labeledSam = outputPrefix + "_labeled.sam" + File readLabels = outputPrefix + "_read_labels.tsv" } runtime { @@ -392,7 +392,7 @@ task LabelReads { parameter_meta { # inputs - SAMfile: {description: "SAM file of transcripts.", category: "required"} + SAMfile: {description: "Sam file of transcripts.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"} tmpDir: {description: "Path to directory for tmp files.", category: "advanced"} @@ -404,8 +404,8 @@ task LabelReads { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputLabeledSAM: {description: "SAM file with labeled transcripts."} - outputReadLabels: {description: "Tabular file with fraction description per read."} + labeledSam: {description: "Sam file with labeled transcripts."} + readLabels: {description: "Tabular file with fraction description per read."} } } @@ -425,7 +425,7 @@ task ReformatGtf { } output { - File outputReformattedGTF = GTFfile + File reformattedGtf = GTFfile } runtime { @@ -436,13 +436,13 @@ task ReformatGtf { parameter_meta { # inputs - GTFfile: {description: "GTF annotation containing genes, transcripts, and edges.", category: "required"} + GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputReformattedGTF: {description: "Reformatted GTF file."} + reformattedGtf: {description: "Reformatted gtf file."} } } @@ -470,7 +470,7 @@ task SummarizeDatasets { } output { - File outputSummaryFile = outputPrefix + "_talon_summary.tsv" + File summaryFile = outputPrefix + "_talon_summary.tsv" } runtime { @@ -481,7 +481,7 @@ task SummarizeDatasets { parameter_meta { # inputs - databaseFile: {description: "TALON database.", category: "required"} + databaseFile: {description: "Talon database.", category: "required"} setVerbose: {description: "Print out the counts in terminal.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} @@ -490,7 +490,7 @@ task SummarizeDatasets { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputSummaryFile: {description: "Tab-delimited file of gene and transcript counts for each dataset."} + summaryFile: {description: "Tab-delimited file of gene and transcript counts for each dataset."} } } @@ -534,10 +534,10 @@ task Talon { >>> output { - File outputUpdatedDatabase = databaseFile - File outputLog = outputPrefix + "/run_QC.log" - File outputAnnot = outputPrefix + "/run_talon_read_annot.tsv" - File outputConfigFile = outputPrefix + "/talonConfigFile.csv" + File updatedDatabase = databaseFile + File talonLog = outputPrefix + "/run_QC.log" + File talonAnnotation = outputPrefix + "/run_talon_read_annot.tsv" + File talonConfigFile = outputPrefix + "/talonConfigFile.csv" } runtime { @@ -549,13 +549,13 @@ task Talon { parameter_meta { # inputs - SAMfiles: {description: "Input SAM files.", category: "required"} + SAMfiles: {description: "Input sam files.", category: "required"} organism: {description: "The name of the organism from which the samples originated.", category: "required"} sequencingPlatform: {description: "The sequencing platform used to generate long reads.", category: "required"} - databaseFile: {description: "TALON database. Created using initialize_talon_database.py.", category: "required"} + databaseFile: {description: "Talon database. Created using initialize_talon_database.py.", category: "required"} genomeBuild: {description: "Genome build (i.e. hg38) to use.", category: "required"} - minimumCoverage: {description: "Minimum alignment coverage in order to use a SAM entry.", category: "common"} - minimumIdentity: {description: "Minimum alignment identity in order to use a SAM entry.", category: "common" } + minimumCoverage: {description: "Minimum alignment coverage in order to use a sam entry.", category: "common"} + minimumIdentity: {description: "Minimum alignment identity in order to use a sam entry.", category: "common" } outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -563,9 +563,9 @@ task Talon { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputUpdatedDatabase: {description: "Updated TALON database."} - outputLog: {description: "Log file from TALON run."} - outputAnnot: {description: "Read annotation file from TALON run."} - outputConfigFile: {description: "The TALON configuration file."} + updatedDatabase: {description: "Updated talon database."} + talonLog: {description: "Log file from talon run."} + talonAnnotation: {description: "Read annotation file from talon run."} + talonConfigFile: {description: "The talon configuration file."} } } From 4fbfe713091934c380df0f1b565428fe520af638 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 11:25:37 +0200 Subject: [PATCH 220/902] Fix last set of tasks. --- centrifuge.wdl | 14 +++++----- isoseq3.wdl | 6 ++--- minimap2.wdl | 24 ++++++++--------- samtools.wdl | 4 +-- talon.wdl | 64 ++++++++++++++++++++++----------------------- transcriptclean.wdl | 64 ++++++++++++++++++++++----------------------- 6 files changed, 87 insertions(+), 89 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index e1cddcad..ee305325 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -281,7 +281,7 @@ task Download { task DownloadTaxonomy { input { - String centrifugeTaxonomyDir + String taxonomyDir String executable = "centrifuge-download" String? preCommand } @@ -290,19 +290,19 @@ task DownloadTaxonomy { set -e -o pipefail ~{preCommand} ~{executable} \ - -o ~{centrifugeTaxonomyDir} \ + -o ~{taxonomyDir} \ taxonomy } output { - File taxonomyTree = centrifugeTaxonomyDir + "/nodes.dmp" - File nameTable = centrifugeTaxonomyDir + "/names.dmp" + File taxonomyTree = taxonomyDir + "/nodes.dmp" + File nameTable = taxonomyDir + "/names.dmp" } } task KReport { input { - File centrifugeClassification + File classification String outputPrefix Array[File]+ indexFiles Boolean noLCA = false @@ -332,7 +332,7 @@ task KReport { ~{true="--is-count-table" false="" isCountTable} \ ~{"--min-score " + minimumScore} \ ~{"--min-length " + minimumLength} \ - ~{centrifugeClassification} \ + ~{classification} \ > ~{outputPrefix + "_kreport.tsv"} >>> @@ -348,7 +348,7 @@ task KReport { parameter_meta { # inputs - centrifugeClassification: {description: "File with centrifuge classification results.", category: "required"} + classification: {description: "File with centrifuge classification results.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} noLCA: {description: "Do not report the lca of multiple assignments, but report count fractions at the taxa.", category: "advanced"} diff --git a/isoseq3.wdl b/isoseq3.wdl index f369553f..604a71d5 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -22,7 +22,7 @@ version 1.0 task Refine { input { - Int minPolyAlength = 20 + Int minPolyALength = 20 Boolean requirePolyA = false String logLevel = "WARN" File inputBamFile @@ -40,7 +40,7 @@ task Refine { set -e mkdir -p "~{outputDir}" isoseq3 refine \ - --min-polya-length ~{minPolyAlength} \ + --min-polya-length ~{minPolyALength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ @@ -68,7 +68,7 @@ task Refine { parameter_meta { # inputs - minPolyAlength: {description: "Minimum poly(A) tail length.", category: "advanced"} + minPolyALength: {description: "Minimum poly(A) tail length.", category: "advanced"} requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "Bam input file.", category: "required"} diff --git a/minimap2.wdl b/minimap2.wdl index 04b02bf2..fb31fb7f 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -50,7 +50,7 @@ task Indexing { } output { - File outputIndexFile = outputPrefix + ".mmi" + File indexFile = outputPrefix + ".mmi" } runtime { @@ -62,7 +62,7 @@ task Indexing { parameter_meta { # input - useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for PacBio).", category: "advanced"} + useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for pacbio).", category: "advanced"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} minimizerWindowSize: {description: "Minimizer window size.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -74,7 +74,7 @@ task Indexing { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - outputIndexFile: {description: "Indexed reference file."} + indexFile: {description: "Indexed reference file."} } } @@ -83,9 +83,9 @@ task Mapping { String presetOption Int kmerSize = 15 Boolean skipSelfAndDualMappings = false - Boolean outputSAM = false + Boolean outputSam = false String outputPrefix - Boolean addMDtagToSAM = false + Boolean addMDTagToSam = false Boolean secondaryAlignment = false File referenceFile File queryFile @@ -110,9 +110,9 @@ task Mapping { -x ~{presetOption} \ -k ~{kmerSize} \ ~{true="-X" false="" skipSelfAndDualMappings} \ - ~{true="-a" false="" outputSAM} \ + ~{true="-a" false="" outputSam} \ -o ~{outputPrefix} \ - ~{true="--MD" false="" addMDtagToSAM} \ + ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ -t ~{cores} \ ~{"-G " + maxIntronLength} \ @@ -126,7 +126,7 @@ task Mapping { } output { - File outputAlignmentFile = outputPrefix + File alignmentFile = outputPrefix } runtime { @@ -139,16 +139,16 @@ task Mapping { parameter_meta { presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} - outputSAM: {description: "Output in the SAM format.", category: "common"} + outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} maxIntronLength: {description: "Max intron length (effective with -xsplice; changing -r).", category: "advanced"} maxFragmentLength: {description: "Max fragment length (effective with -xsr or in the fragment mode).", category: "advanced"} skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} - retainMaxSecondaryAlignments: {description: "Retain at most INT secondary alignments.", category: "advanced"} + retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} - addMDtagToSAM: {description: "Adds a MD tag to the SAM output file.", category: "common"} + addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} referenceFile: {description: "Reference fasta file.", category: "required"} queryFile: {description: "Input fasta file.", category: "required"} @@ -158,6 +158,6 @@ task Mapping { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - outputAlignmentFile: {description: "Mapping and alignment between collections of DNA sequences file."} + alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} } } diff --git a/samtools.wdl b/samtools.wdl index 0b8394bf..c155f026 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -423,6 +423,7 @@ task Sort { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + # outputs outputBam: {description: "Sorted BAM file."} } @@ -526,11 +527,10 @@ task View { excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/talon.wdl b/talon.wdl index e39a3cd2..c796c1ce 100644 --- a/talon.wdl +++ b/talon.wdl @@ -242,9 +242,9 @@ task GetReadAnnotations { task GetSpliceJunctions { input { - File SJinformationFile + File sjInformationFile String inputFileType = "db" - File referenceGTF + File referenceGtf String runMode = "intron" String outputPrefix @@ -259,8 +259,8 @@ task GetSpliceJunctions { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_get_sjs \ - ~{SJfileType[inputFileType] + SJinformationFile} \ - --ref ~{referenceGTF} \ + ~{SJfileType[inputFileType] + sjInformationFile} \ + --ref ~{referenceGtf} \ --mode ~{runMode} \ --outprefix ~{outputPrefix} } @@ -277,9 +277,9 @@ task GetSpliceJunctions { parameter_meta { # inputs - SJinformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"} - inputFileType: {description: "The file type of SJinformationFile.", category: "common"} - referenceGTF: {description: "Gtf reference file (ie gencode).", category: "required"} + sjInformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"} + inputFileType: {description: "The file type of sjInformationFile.", category: "common"} + referenceGtf: {description: "Gtf reference file (ie gencode).", category: "required"} runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -293,13 +293,13 @@ task GetSpliceJunctions { task InitializeTalonDatabase { input { - File GTFfile + File gtfFile String genomeBuild String annotationVersion Int minimumLength = 300 - String novelIDprefix = "TALON" - Int cutoff5p = 500 - Int cutoff3p = 300 + String novelPrefix = "TALON" + Int cutOff5p = 500 + Int cutOff3p = 300 String outputPrefix String memory = "10G" @@ -311,13 +311,13 @@ task InitializeTalonDatabase { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_initialize_database \ - --f=~{GTFfile} \ + --f=~{gtfFile} \ --g=~{genomeBuild} \ --a=~{annotationVersion} \ --l=~{minimumLength} \ - --idprefix=~{novelIDprefix} \ - --5p=~{cutoff5p} \ - --3p=~{cutoff3p} \ + --idprefix=~{novelPrefix} \ + --5p=~{cutOff5p} \ + --3p=~{cutOff3p} \ --o=~{outputPrefix} } @@ -333,13 +333,13 @@ task InitializeTalonDatabase { parameter_meta { # inputs - GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} + gtfFile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} genomeBuild: {description: "Name of genome build that the gtf file is based on (ie hg38).", category: "required"} annotationVersion: {description: "Name of supplied annotation (will be used to label data).", category: "required"} minimumLength: { description: "Minimum required transcript length.", category: "common"} - novelIDprefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"} - cutoff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"} - cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} + novelPrefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"} + cutOff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"} + cutOff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -352,7 +352,7 @@ task InitializeTalonDatabase { task LabelReads { input { - File SAMfile + File samFile File referenceGenome Int fracaRangeSize = 20 String tmpDir = "./tmp_label_reads" @@ -369,7 +369,7 @@ task LabelReads { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_label_reads \ - --f=~{SAMfile} \ + --f=~{samFile} \ --g=~{referenceGenome} \ --t=~{threads} \ --ar=~{fracaRangeSize} \ @@ -392,7 +392,7 @@ task LabelReads { parameter_meta { # inputs - SAMfile: {description: "Sam file of transcripts.", category: "required"} + samFile: {description: "Sam file of transcripts.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"} tmpDir: {description: "Path to directory for tmp files.", category: "advanced"} @@ -411,7 +411,7 @@ task LabelReads { task ReformatGtf { input { - File GTFfile + File gtfFile String memory = "4G" Int timeMinutes = 30 @@ -421,11 +421,11 @@ task ReformatGtf { command { set -e talon_reformat_gtf \ - -gtf ~{GTFfile} + -gtf ~{gtfFile} } output { - File reformattedGtf = GTFfile + File reformattedGtf = gtfFile } runtime { @@ -436,7 +436,7 @@ task ReformatGtf { parameter_meta { # inputs - GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} + gtfFile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -452,7 +452,7 @@ task SummarizeDatasets { Boolean setVerbose = false String outputPrefix - File? datasetGroupsCSV + File? datasetGroupsCsv String memory = "4G" Int timeMinutes = 50 @@ -466,7 +466,7 @@ task SummarizeDatasets { --db ~{databaseFile} \ ~{true="--verbose" false="" setVerbose} \ --o ~{outputPrefix} \ - ~{"--groups " + datasetGroupsCSV} + ~{"--groups " + datasetGroupsCsv} } output { @@ -484,7 +484,7 @@ task SummarizeDatasets { databaseFile: {description: "Talon database.", category: "required"} setVerbose: {description: "Print out the counts in terminal.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} + datasetGroupsCsv: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -496,7 +496,7 @@ task SummarizeDatasets { task Talon { input { - Array[File] SAMfiles + Array[File] samFiles String organism String sequencingPlatform = "PacBio-RS-II" File databaseFile @@ -518,7 +518,7 @@ task Talon { ln -s $PWD/tmp /tmp/sqltmp #Multiprocessing will crash if the absolute path is too long. export TMPDIR=/tmp/sqltmp printf "" > ~{outputPrefix}/talonConfigFile.csv #File needs to be emptied when task is rerun. - for file in ~{sep=" " SAMfiles} + for file in ~{sep=" " samFiles} do configFileLine="$(basename ${file%.*}),~{organism},~{sequencingPlatform},${file}" echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv @@ -549,7 +549,7 @@ task Talon { parameter_meta { # inputs - SAMfiles: {description: "Input sam files.", category: "required"} + samFiles: {description: "Input sam files.", category: "required"} organism: {description: "The name of the organism from which the samples originated.", category: "required"} sequencingPlatform: {description: "The sequencing platform used to generate long reads.", category: "required"} databaseFile: {description: "Talon database. Created using initialize_talon_database.py.", category: "required"} diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 15da1f58..daf79703 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -22,7 +22,7 @@ version 1.0 task GetSJsFromGtf { input { - File GTFfile + File gtfFile File genomeFile String outputPrefix Int minIntronSize = 21 @@ -36,14 +36,14 @@ task GetSJsFromGtf { set -e mkdir -p "$(dirname ~{outputPrefix})" get_SJs_from_gtf \ - --f=~{GTFfile} \ + --f=~{gtfFile} \ --g=~{genomeFile} \ --minIntronSize=~{minIntronSize} \ ~{"--o=" + outputPrefix + ".tsv"} } output { - File outputSJsFile = outputPrefix + ".tsv" + File spliceJunctionFile = outputPrefix + ".tsv" } runtime { @@ -54,22 +54,21 @@ task GetSJsFromGtf { parameter_meta { # inputs - GTFfile: {description: "Input GTF file", category: "required"} + gtfFile: {description: "Input gtf file", category: "required"} genomeFile: {description: "Reference genome", category: "required"} minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputSJsFile: {description: "Extracted splice junctions."} + spliceJunctionFile: {description: "Extracted splice junctions."} } } task GetTranscriptCleanStats { input { - File transcriptCleanSAMfile + File transcriptCleanSamFile String outputPrefix String memory = "4G" @@ -81,12 +80,12 @@ task GetTranscriptCleanStats { set -e mkdir -p "$(dirname ~{outputPrefix})" get_TranscriptClean_stats \ - ~{transcriptCleanSAMfile} \ + ~{transcriptCleanSamFile} \ ~{outputPrefix} } output { - File outputStatsFile = stdout() + File statsFile = stdout() } runtime { @@ -97,24 +96,23 @@ task GetTranscriptCleanStats { parameter_meta { # inputs - transcriptCleanSAMfile: {description: "Output SAM file from TranscriptClean", category: "required"} + transcriptCleanSamFile: {description: "Output sam file from transcriptclean", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputStatsFile: {description: "Summary stats from TranscriptClean run."} + statsFile: {description: "Summary stats from transcriptclean run."} } } task TranscriptClean { input { - File SAMfile + File samFile File referenceGenome Int maxLenIndel = 5 - Int maxSJoffset = 5 + Int maxSJOffset = 5 String outputPrefix Boolean correctMismatches = true Boolean correctIndels = true @@ -138,7 +136,7 @@ task TranscriptClean { set -e mkdir -p "$(dirname ~{outputPrefix})" TranscriptClean \ - -s ~{SAMfile} \ + -s ~{samFile} \ -g ~{referenceGenome} \ -t ~{cores} \ --maxLenIndel=~{maxLenIndel} \ @@ -157,10 +155,10 @@ task TranscriptClean { } output { - File outputTranscriptCleanFasta = outputPrefix + "_clean.fa" - File outputTranscriptCleanLog = outputPrefix + "_clean.log" - File outputTranscriptCleanSAM = outputPrefix + "_clean.sam" - File outputTranscriptCleanTElog = outputPrefix + "_clean.TE.log" + File fastaFile = outputPrefix + "_clean.fa" + File logFile = outputPrefix + "_clean.log" + File samFile = outputPrefix + "_clean.sam" + File logFileTE = outputPrefix + "_clean.TE.log" } runtime { @@ -172,21 +170,21 @@ task TranscriptClean { parameter_meta { # inputs - SAMfile: {description: "Input SAM file containing transcripts to correct.", category: "required"} + samFile: {description: "Input sam file containing transcripts to correct.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} maxLenIndel: {description: "Maximum size indel to correct.", category: "advanced"} - maxSJoffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"} + maxSJOffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - correctMismatches: {description: "Set this to make TranscriptClean correct mismatches.", category: "common"} - correctIndels: {description: "Set this to make TranscriptClean correct indels.", category: "common"} - correctSJs: {description: "Set this to make TranscriptClean correct splice junctions.", category: "common"} - dryRun: {description: "TranscriptClean will read in the data but don't do any correction.", category: "advanced"} + correctMismatches: {description: "Set this to make transcriptclean correct mismatches.", category: "common"} + correctIndels: {description: "Set this to make transcriptclean correct indels.", category: "common"} + correctSJs: {description: "Set this to make transcriptclean correct splice junctions.", category: "common"} + dryRun: {description: "Transcriptclean will read in the data but don't do any correction.", category: "advanced"} primaryOnly: {description: "Only output primary mappings of transcripts.", category: "advanced"} canonOnly: {description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.", category: "advanced"} bufferSize: {description: "Number of lines to output to file at once by each thread during run.", category: "common"} - deleteTmp: {description: "The temporary directory generated by TranscriptClean will be removed.", category: "common"} + deleteTmp: {description: "The temporary directory generated by transcriptclean will be removed.", category: "common"} spliceJunctionAnnotation: {description: "Splice junction file.", category: "common"} - variantFile: {description: "VCF formatted file of variants.", category: "common"} + variantFile: {description: "Vcf formatted file of variants.", category: "common"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -194,9 +192,9 @@ task TranscriptClean { category: "advanced"} # outputs - outputTranscriptCleanFasta: {description: "Fasta file containing corrected reads."} - outputTranscriptCleanLog: {description: "Log file of TranscriptClean run."} - outputTranscriptCleanSAM: {description: "SAM file containing corrected aligned reads."} - outputTranscriptCleanTElog: {description: "TE log file of TranscriptClean run."} + fastaFile: {description: "Fasta file containing corrected reads."} + logFile: {description: "Log file of transcriptclean run."} + samFile: {description: "Sam file containing corrected aligned reads."} + logFileTE: {description: "TE log file of transcriptclean run."} } } From 7e8c833eadb0259da33e6d641393f77dbe3f2578 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 11:54:00 +0200 Subject: [PATCH 221/902] Fix tests. --- transcriptclean.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/transcriptclean.wdl b/transcriptclean.wdl index daf79703..6e0de8a9 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -109,7 +109,7 @@ task GetTranscriptCleanStats { task TranscriptClean { input { - File samFile + File inputSam File referenceGenome Int maxLenIndel = 5 Int maxSJOffset = 5 @@ -136,7 +136,7 @@ task TranscriptClean { set -e mkdir -p "$(dirname ~{outputPrefix})" TranscriptClean \ - -s ~{samFile} \ + -s ~{inputSam} \ -g ~{referenceGenome} \ -t ~{cores} \ --maxLenIndel=~{maxLenIndel} \ @@ -170,7 +170,7 @@ task TranscriptClean { parameter_meta { # inputs - samFile: {description: "Input sam file containing transcripts to correct.", category: "required"} + inputSam: {description: "Input sam file containing transcripts to correct.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} maxLenIndel: {description: "Maximum size indel to correct.", category: "advanced"} maxSJOffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"} From 5e572ffcf5057fb2ac90bbde90f21e27936dc793 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 11:57:23 +0200 Subject: [PATCH 222/902] Fix some input naming. --- transcriptclean.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 6e0de8a9..1eea686c 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -68,7 +68,7 @@ task GetSJsFromGtf { task GetTranscriptCleanStats { input { - File transcriptCleanSamFile + File inputSam String outputPrefix String memory = "4G" @@ -80,7 +80,7 @@ task GetTranscriptCleanStats { set -e mkdir -p "$(dirname ~{outputPrefix})" get_TranscriptClean_stats \ - ~{transcriptCleanSamFile} \ + ~{inputSam} \ ~{outputPrefix} } @@ -96,7 +96,7 @@ task GetTranscriptCleanStats { parameter_meta { # inputs - transcriptCleanSamFile: {description: "Output sam file from transcriptclean", category: "required"} + inputSam: {description: "Output sam file from transcriptclean", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -157,7 +157,7 @@ task TranscriptClean { output { File fastaFile = outputPrefix + "_clean.fa" File logFile = outputPrefix + "_clean.log" - File samFile = outputPrefix + "_clean.sam" + File outputSam = outputPrefix + "_clean.sam" File logFileTE = outputPrefix + "_clean.TE.log" } @@ -194,7 +194,7 @@ task TranscriptClean { # outputs fastaFile: {description: "Fasta file containing corrected reads."} logFile: {description: "Log file of transcriptclean run."} - samFile: {description: "Sam file containing corrected aligned reads."} + outputSam: {description: "Sam file containing corrected aligned reads."} logFileTE: {description: "TE log file of transcriptclean run."} } } From fe7b8a1edbf5cbf3ab766a67c9428d430807204f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 12:06:57 +0200 Subject: [PATCH 223/902] Fix tests. --- talon.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/talon.wdl b/talon.wdl index c796c1ce..c11ab9e0 100644 --- a/talon.wdl +++ b/talon.wdl @@ -322,7 +322,7 @@ task InitializeTalonDatabase { } output { - File database = outputPrefix + ".db" + File databaseFile = outputPrefix + ".db" } runtime { @@ -346,13 +346,13 @@ task InitializeTalonDatabase { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - database: {description: "Talon database."} + databaseFile: {description: "Talon database."} } } task LabelReads { input { - File samFile + File inputSam File referenceGenome Int fracaRangeSize = 20 String tmpDir = "./tmp_label_reads" @@ -369,7 +369,7 @@ task LabelReads { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_label_reads \ - --f=~{samFile} \ + --f=~{inputSam} \ --g=~{referenceGenome} \ --t=~{threads} \ --ar=~{fracaRangeSize} \ @@ -392,7 +392,7 @@ task LabelReads { parameter_meta { # inputs - samFile: {description: "Sam file of transcripts.", category: "required"} + inputSam: {description: "Sam file of transcripts.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"} tmpDir: {description: "Path to directory for tmp files.", category: "advanced"} From 5010738cf4cf0ac034ff5b0418938e9ffe77a518 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 12:27:18 +0200 Subject: [PATCH 224/902] Update CHANGELOG. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9796c484..91698644 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl, + transcriptclean.wdl to be more descriptive. + Renamed outputs of tasks used in the TALON-WDL, PacBio-subreads-processing & sequence-classification pipelines. + Reworked bcf2vcf task into bcftools view task. From d8a159f32dd321b87fd76c1ca4522e109eb5e0fc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 12:33:57 +0200 Subject: [PATCH 225/902] Fix tests. --- transcriptclean.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 1eea686c..79661307 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -61,6 +61,7 @@ task GetSJsFromGtf { memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs spliceJunctionFile: {description: "Extracted splice junctions."} } @@ -140,7 +141,7 @@ task TranscriptClean { -g ~{referenceGenome} \ -t ~{cores} \ --maxLenIndel=~{maxLenIndel} \ - --maxSJOffset=~{maxSJoffset} \ + --maxSJOffset=~{maxSJOffset} \ -o ~{outputPrefix} \ ~{true="-m true" false="-m false" correctMismatches} \ ~{true="-i true" false="-i false" correctIndels} \ From fd4e8619a2838b533796bad70a73d6e21032a27f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 12:45:51 +0200 Subject: [PATCH 226/902] Update CHANGELOG. --- CHANGELOG.md | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91698644..95241551 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,13 +16,13 @@ version 4.0.0-develop + Renamed outputs of tasks used in the TALON-WDL, PacBio-subreads-processing & sequence-classification pipelines. + Reworked bcf2vcf task into bcftools view task. -+ Removed the redundant format flag from the htseq interface. This is ++ Removed the redundant format flag from the htseq interface. This is autodetected in newer versions of htseq. + Update docker images for samtools, bcftools, picard, GATK, cutadapt, htseq and chunked-scatter. + Default docker images for bwa, bwakit and hisat2 updated to include samtools 1.10. -+ Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 ++ Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 compression. + Hisat2 task has added controls for samtools. + Alignment tasks no longer produce BAM indexes as these are not needed @@ -34,18 +34,18 @@ version 4.0.0-develop BAM files. + Changed PicardMarkduplicates to use COMPRESSION_LEVEL=1 by default with the htsjdk deflater. - This makes the task finish in 32% less time at the cost of a 8% larger BAM - file. + This makes the task finish in 32% less time at the cost of a 8% larger BAM + file. + Added sambamba markdup and sambamba sort. NOTE: samtools sort is more efficient and is recommended. + Correctly represent samtools inconsistent use of the threads flag. Sometimes it means 'threads' sometimes it means 'additional threads'. - BioWDL tasks now use only threads. The `threads - 1` conversion is + BioWDL tasks now use only threads. The `threads - 1` conversion is applied where necessary for samtools tools that use additional threads. + Updated BWA MEM and BWA KIT tasks to use samtools sort version 1.10 for sorting the BAM file. -+ Updated memory requirements on bcftools Stats, bwa mem, bwakit, GATK - ApplyBQSR, GATK BaseRecalibrator, GATK GatherBqsrReports, Gatk ++ Updated memory requirements on bcftools Stats, bwa mem, bwakit, GATK + ApplyBQSR, GATK BaseRecalibrator, GATK GatherBqsrReports, Gatk HaplotypeCaller, Picard CollectMultipleMetrics, Picard GatherBamFiles, samtools Flagstat, samtools sort and bcftools stats. + TALON: Update `FilterTalonTranscripts` to new version, which removes the @@ -54,13 +54,13 @@ version 4.0.0-develop + TALON: Update to version 5.0. + Add tasks for pbmm2, the PacBio wrapper for minimap2. + Update the image for chunked-scatter and make use of new features from 0.2.0. -+ Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and ++ Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and STAR. -+ Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) - that replaces biopet-scatterregions. ++ Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) + that replaces biopet-scatterregions. + The FastQC task now talks to the Java directly instead of using the included Perl wrapper for FastQC. This has the advantage that memory and threads can - be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) + be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) was set, as OOM errors occurred frequently on some fastqs. + STAR: Add options regarding alignment score (regarding read length as well) for tweaking when processing rRNA depleted samples. @@ -82,12 +82,12 @@ version 4.0.0-develop opposed to virtual memory). + Added `-XX:ParallelGCThreads=1` to the java options of java tasks. + Added `timeMinutes` input to many tasks, this indicates a maximum - number of minutes that the job will run. The associated runtime + number of minutes that the job will run. The associated runtime attribute is `time_minutes` which can be used to inform a scheduler (eg. slurm) of the run time of the job. + Added STAR GenomeGenerate task. -+ GATK.HaplotypeCaller: Add `--dont-use-soft-clipped-bases` and - `--standard-min-confidence-threshold-for-calling` options. These are ++ GATK.HaplotypeCaller: Add `--dont-use-soft-clipped-bases` and + `--standard-min-confidence-threshold-for-calling` options. These are required for RNA seq variant calling according to GATK best practices. + Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. @@ -100,7 +100,6 @@ version 4.0.0-develop + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. - version 3.1.0 --------------------------- + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the @@ -113,12 +112,11 @@ version 3.1.0 + Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. + Cutadapt now explicitly calls the `--compression-level` flag with compression - level 1 to prevent cutadapt from using very high gzip compression level 6 + level 1 to prevent cutadapt from using very high gzip compression level 6 that uses 400% more cpu time. + Update default docker image for cutadapt and fastqc. + Default number of cores for cutadapt and bwamem to 4 cores. - version 3.0.0 --------------------------- + Add optional input umiSeparator in umi-tools dedup task. @@ -129,7 +127,7 @@ version 3.0.0 + Allow setting the `--emit-ref-confidence` flag for HaplotypeCaller. + Add `--output-mode` flag to HaplotypeCaller. + Added rtg.Format and rtg.VcfEval tasks. -+ Added gatk.SelectVariants and gatk.VariantFiltration tasks. ++ Added gatk.SelectVariants and gatk.VariantFiltration tasks. + Fixed a bug where the output directory was not created for bwa.Kit. + Add vt task for variants normalization and decomposition. + Update WDL task Picard (Add task RenameSample). @@ -146,11 +144,11 @@ version 3.0.0 biopet.ScatterRegions now always returns correctly ordered scatters. + Add tasks for umi-tools dedup and extract. + Add `GenomicsDBImport` task for GATK. -+ Add `annotationGroups` input to `GenotypeGVCFs` to allow setting multiple ++ Add `annotationGroups` input to `GenotypeGVCFs` to allow setting multiple annotation groups. The `StandardAnnotation` group is still used as default. + GenotypeGVCFs, only allow one input GVCF file, as the tool also only allows - one input file. -+ Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set + one input file. ++ Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set whether output should be a GVCF. + Centrifuge: Add Krona task specific to Centrifuge. + Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located. @@ -175,7 +173,7 @@ version 3.0.0 + PreprocessIntervals + Add common.TextToFile task. + Add bedtools.Intersect. -+ Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files ++ Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files from going unnoticed. + Centrifuge: Fix -1/-U options for single end data. + Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple @@ -261,7 +259,7 @@ version 1.0.0 + Common: Update dockerTag to dockerImage. + GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers. + Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls). -+ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. ++ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. + Mutect2: Add necessary missing index attribute for panel of normals. + MultiQC: Add memory variable to multiqc task. + GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need regions files as required inputs. From ccbb50c1bb5c45e60a81c3051ee6a041c0e8d6ec Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 22 Jul 2020 17:07:52 +0200 Subject: [PATCH 227/902] Bring changes to hisat2 too --- hisat2.wdl | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/hisat2.wdl b/hisat2.wdl index c24610ed..a86214f9 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -34,10 +34,10 @@ task Hisat2 { String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" Int threads = 4 - Int sortThreads = 1 + Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - Int memoryGb = 1 + threads + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads + Int? memoryGb Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools @@ -45,7 +45,12 @@ task Hisat2 { String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2880dd9d8ad0a7b221d4eacda9a818e92983128d-0" } - String bamIndexPath = sub(outputBam, "\.bam$", ".bai") + # Samtools sort may block the pipe while it is writing data to disk. + # This can lead to cpu underutilization. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) + Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) + Int estimatedMemoryGb = 1 + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads command { set -e -o pipefail @@ -63,7 +68,7 @@ task Hisat2 { --new-summary \ --summary-file ~{summaryFilePath} \ | samtools sort \ - ~{"-@ " + sortThreads} \ + ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ @@ -76,8 +81,8 @@ task Hisat2 { } runtime { - memory: "~{memoryGb}G" - cpu: threads + 1 + memory: "~{estimatedMemoryGb}G" + cpu: threads time_minutes: timeMinutes docker: dockerImage } From f6bf488f35382c6a21095756a96b50fd54b4d818 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 22 Jul 2020 17:09:41 +0200 Subject: [PATCH 228/902] correct memory selection --- hisat2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hisat2.wdl b/hisat2.wdl index a86214f9..f9a4bc59 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -81,7 +81,7 @@ task Hisat2 { } runtime { - memory: "~{estimatedMemoryGb}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" cpu: threads time_minutes: timeMinutes docker: dockerImage From 15c960d2beb3423b1608b3ddf3479808e991218c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 22 Jul 2020 17:12:21 +0200 Subject: [PATCH 229/902] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95241551..55fb1e8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The + number of threads is now related to the number of threads on the aligner. + Using more threads reduces the chance of the samtools sort pipe getting + blocked if it's full. + Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl, transcriptclean.wdl to be more descriptive. + Renamed outputs of tasks used in the TALON-WDL, PacBio-subreads-processing & From d15ee30f59bc8f16f5e4702ba2e35a76e8ead10c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 08:26:50 +0200 Subject: [PATCH 230/902] Combine BWA and BW kit tasks --- bwa.wdl | 85 +++++---------------------------------------------------- 1 file changed, 7 insertions(+), 78 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 58e1dc80..cdaed83a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -21,78 +21,6 @@ version 1.0 # SOFTWARE. task Mem { - input { - File read1 - File? read2 - BwaIndex bwaIndex - String outputPath - String? readgroup - - Int threads = 4 - Int? sortThreads - Int sortMemoryPerThreadGb = 2 - Int compressionLevel = 1 - Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) - # This container contains: samtools (1.10), bwa (0.7.17-r1188) - String dockerImage = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" - } - - # Samtools sort may block the pipe while it is writing data to disk. - # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. - Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) - Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - - command { - set -e -o pipefail - mkdir -p "$(dirname ~{outputPath})" - bwa mem \ - ~{"-t " + threads} \ - ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ - ~{bwaIndex.fastaFile} \ - ~{read1} \ - ~{read2} \ - | samtools sort \ - ~{"-@ " + totalSortThreads} \ - -m ~{sortMemoryPerThreadGb}G \ - -l ~{compressionLevel} \ - - \ - -o ~{outputPath} - } - - output { - File outputBam = outputPath - } - - runtime { - cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - read1: {description: "The first or single end fastq file.", category: "required"} - read2: {description: "The second end fastq file.", category: "common"} - bwaIndex: {description: "The BWA index files.", category: "required"} - outputPath: {description: "The location the output BAM file should be written to.", category: "required"} - readgroup: {description: "The readgroup to be assigned to the reads. See BWA mem's `-R` option.", category: "common"} - - threads: {description: "The number of threads to use.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task Kit { input { File read1 File? read2 @@ -100,7 +28,7 @@ task Kit { String outputPrefix String? readgroup Boolean sixtyFour = false - + Boolean usePostalt = false Int threads = 4 Int? sortThreads Int sortMemoryPerThreadGb = 2 @@ -118,6 +46,8 @@ task Kit { Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + String bwaKitCommand = "bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}" + (if sixtyFour then ".64.alt" else ".alt") + " | " + String kitCommandString = if usePostalt then bwaKitCommand else "" command { set -e @@ -129,10 +59,8 @@ task Kit { ~{read1} \ ~{read2} \ 2> ~{outputPrefix}.log.bwamem | \ - bwa-postalt.js \ - -p ~{outputPrefix}.hla \ - ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ - samtools sort \ + ~{kitCommandString} \ + samtools sort \ ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ @@ -157,7 +85,8 @@ task Kit { # inputs read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} - bwaIndex: {description: "The BWA index, including a .alt file.", category: "required"} + bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} From 830eb51555889da4c3733ad5c7bbea4528a57887 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 09:26:14 +0200 Subject: [PATCH 231/902] Enable/disable postalt with comment --- bwa.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index cdaed83a..f2c731f2 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -46,9 +46,8 @@ task Mem { Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - String bwaKitCommand = "bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}" + (if sixtyFour then ".64.alt" else ".alt") + " | " - String kitCommandString = if usePostalt then bwaKitCommand else "" + # The bwa postalt script is out commented as soon as usePostalt = false. It is a hack but it should work. command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -59,7 +58,7 @@ task Mem { ~{read1} \ ~{read2} \ 2> ~{outputPrefix}.log.bwamem | \ - ~{kitCommandString} \ + ~{true="" false="#" usePostalt} bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ @@ -70,6 +69,7 @@ task Mem { output { File outputBam = outputPrefix + ".aln.bam" + File? outputHla = outputPrefix + ".hla" } runtime { From 2b073f668e17643ac393d012986b9bcc3096978f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 09:30:09 +0200 Subject: [PATCH 232/902] Add comments on comments --- bwa.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index f2c731f2..fdeb870f 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -47,7 +47,8 @@ task Mem { # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - # The bwa postalt script is out commented as soon as usePostalt = false. It is a hack but it should work. + # The bwa postalt script is out commented as soon as usePostalt = false. + # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e mkdir -p "$(dirname ~{outputPrefix})" From 997b7765a0403778ad842ae2a8e1c50f38bfd05b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 09:55:49 +0200 Subject: [PATCH 233/902] Add bwa-mem2 task --- bwa-mem2.wdl | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 bwa-mem2.wdl diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl new file mode 100644 index 00000000..df3801b4 --- /dev/null +++ b/bwa-mem2.wdl @@ -0,0 +1,112 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Mem { + # NOTE: THIS IS A COPY OF THE BWA TASK WITH ONLY bwa CHANGED TO bwa-mem2 AND A DIFFERENT DOCKER IMAGE. + input { + File read1 + File? read2 + BwaIndex bwaIndex + String outputPrefix + String? readgroup + Boolean sixtyFour = false + Boolean usePostalt = false + Int threads = 4 + Int? sortThreads + Int sortMemoryPerThreadGb = 2 + Int compressionLevel = 1 + Int? memoryGb + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 + String dockerImage = "biowdl/bwamem2-kit:2.0-dev" + } + + # Samtools sort may block the pipe while it is writing data to disk. + # This can lead to cpu underutilization. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) + Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + + # The bwa postalt script is out commented as soon as usePostalt = false. + # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bwa-mem2 mem \ + -t ~{threads} \ + ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ + ~{bwaIndex.fastaFile} \ + ~{read1} \ + ~{read2} \ + 2> ~{outputPrefix}.log.bwamem | \ + ~{true="" false="#" usePostalt} bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ + samtools sort \ + ~{"-@ " + totalSortThreads} \ + -m ~{sortMemoryPerThreadGb}G \ + -l ~{compressionLevel} \ + - \ + -o ~{outputPrefix}.aln.bam + } + + output { + File outputBam = outputPrefix + ".aln.bam" + File? outputHla = outputPrefix + ".hla" + } + + runtime { + # One extra thread for bwa-postalt + samtools is not needed. + # These only use 5-10% of compute power and not always simultaneously. + cpu: threads + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + read1: {description: "The first-end fastq file.", category: "required"} + read2: {description: "The second-end fastq file.", category: "common"} + bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} + outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputBam: "The produced BAM file." + } +} + +struct BwaIndex { + File fastaFile + Array[File] indexFiles +} From e968433fdc7d7f26986ddd1ba264f80dd7579d37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 15:34:30 +0200 Subject: [PATCH 234/902] Update image --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index df3801b4..5ac6958e 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 - String dockerImage = "biowdl/bwamem2-kit:2.0-dev" + String dockerImage = "biowdl/bwamem2-kit:2.0-dev2" # TODO: Update to biocontainer. } # Samtools sort may block the pipe while it is writing data to disk. From d27eea90b9aa3b3683de5522f8f5cb541ec86211 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 27 Jul 2020 10:21:55 +0200 Subject: [PATCH 235/902] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55fb1e8a..d3d719f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ A bwa-mem2 task was created with the same interface (including usePostalt) + as the bwa mem task. ++ bwa mem and bwa kit are now one task. The usePostalt boolean can be used to + switch the postalt script on and off. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. Using more threads reduces the chance of the samtools sort pipe getting From 18fb322e8b24dee1292f56b6245dc26325eb5ffd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 27 Jul 2020 15:59:21 +0200 Subject: [PATCH 236/902] use mulled biocontainer --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 5ac6958e..d3290d0b 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 - String dockerImage = "biowdl/bwamem2-kit:2.0-dev2" # TODO: Update to biocontainer. + String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } # Samtools sort may block the pipe while it is writing data to disk. From 6eb33c3dab050b2b1d8b34183caeaf8a8026407b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 28 Jul 2020 07:51:04 +0200 Subject: [PATCH 237/902] Set compression level to 1 and higher time estimate for mergevcf task --- picard.wdl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index adb55b4b..1afa5ea7 100644 --- a/picard.wdl +++ b/picard.wdl @@ -553,8 +553,14 @@ task MergeVCFs { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) + Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + Int compressionLevel = 1 + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. + # NOTE: this might change in the future when the intel deflater is updated! + Boolean useJdkDeflater = true + } # Using MergeVcfs instead of GatherVcfs so we can create indices @@ -566,7 +572,10 @@ task MergeVCFs { picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ MergeVcfs \ INPUT=~{sep=' INPUT=' inputVCFs} \ - OUTPUT=~{outputVcfPath} + OUTPUT=~{outputVcfPath} \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -592,6 +601,9 @@ task MergeVCFs { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} } } From 9a9bf3f5b1bcef7669b05fcc132caadf411e1140 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 28 Jul 2020 15:51:43 +0200 Subject: [PATCH 238/902] add gridss task --- gridss.wdl | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 gridss.wdl diff --git a/gridss.wdl b/gridss.wdl new file mode 100644 index 00000000..37ac83b7 --- /dev/null +++ b/gridss.wdl @@ -0,0 +1,65 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import "bwa.wdl" as bwa + +task GRIDSS { + input { + File tumorBam + File tumorBai + String tumorLabel + File? normalBam + File? normalBai + String? normalLabel + BwaIndex reference + String outputPrefix = "gridss" + + Int threads = 1 + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + } + + command { + gridss \ + --reference ~{reference.fastaFile} \ + --output ~{outputPrefix}.vcf.gz \ + --assembly ~{outputPrefix}_assembly.bam \ + ~{"-t " + threads} \ + --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + ~{normalBam} \ + ~{tumorBam} + tabix -p vcf ~{outputPrefix}.vcf.gz + samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai + } + + output { + File vcf = outputPrefix + ".vcf.gz" + File vcfIndex = outputPrefix + ".vcf.gz.tbi" + File assembly = outputPrefix + "_assembly.bam" + File assemblyIndex = outputPrefix + "_assembly.bai" + } + + runtime { + cpu: threads + memory: "32G" + docker: dockerImage + } +} \ No newline at end of file From 20068636fe79595050396d2bdc665fde8126bf33 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jul 2020 10:17:58 +0200 Subject: [PATCH 239/902] slightly tune memory requirements --- bwa-mem2.wdl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index d3290d0b..6ea4578d 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -21,7 +21,6 @@ version 1.0 # SOFTWARE. task Mem { - # NOTE: THIS IS A COPY OF THE BWA TASK WITH ONLY bwa CHANGED TO bwa-mem2 AND A DIFFERENT DOCKER IMAGE. input { File read1 File? read2 @@ -45,8 +44,13 @@ task Mem { # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + # BWA-mem2's index files contain 2 BWT indexes of which only one is used. .2bit64 is used by default and + # .8bit32 is used for avx2. + # The larger one of these is the 8bit32 index. Since we do not know beforehand which one is used we need to accomodate for that. + # Using only the 8bit32 index uses 57,5% of the index files. Since bwa-mem2 uses slightly more memory than the index + # We put it at 62% as a safety factor. That means the memory usage for bwa-mem will be 53G for a human genome. Resulting in 60G total + # on 8 cores with samtools with 3 sort threads. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 0.62) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. From ed49efcbda3f90819dbc8561be89690268f23dd9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jul 2020 13:17:18 +0200 Subject: [PATCH 240/902] Update default cutadapt image --- cutadapt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index d125af43..7faeaff1 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -81,7 +81,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37h516909a_0" + String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) From be890eb56a43ea86d32ead9c5c9b85d134d01166 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jul 2020 13:22:52 +0200 Subject: [PATCH 241/902] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55fb1e8a..255c2186 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. Using more threads reduces the chance of the samtools sort pipe getting From 94128f4fe9fee9bfc88b7c96c07768141c197fa7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 29 Jul 2020 13:40:05 +0200 Subject: [PATCH 242/902] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 255c2186..67ea94a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Added a task for GRIDSS. + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. From 247561f6bfe261744902980621ad133f2ba8d971 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 29 Jul 2020 14:30:09 +0200 Subject: [PATCH 243/902] add parameter_meta to gridss --- gridss.wdl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 37ac83b7..14bc441c 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -38,6 +38,8 @@ task GRIDSS { } command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" gridss \ --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ @@ -62,4 +64,18 @@ task GRIDSS { memory: "32G" docker: dockerImage } + + parameter_meta { + tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"} + tumorBai: {description: "The index for tumorBam.", category: "required"} + tumorLabel: {description: "The name of the (tumor) sample.", category: "required"} + normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} + normalBai: {description: "The index for normalBam.", category: "advanced"} + normalLabel: {description: "The name of the normal sample.", category: "advanced"} + BwaIndex reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} + + threads: {description: "The number of the threads to use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } } \ No newline at end of file From 82f0cc79f1d2d49a5d34c27ea743f1be7655d7f5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 29 Jul 2020 14:38:13 +0200 Subject: [PATCH 244/902] fix parameter_meta --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 14bc441c..3d4b7d73 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -72,7 +72,7 @@ task GRIDSS { normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} - BwaIndex reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} From 0ecfa670f78336f0fc876ba2a44f6601971f4ca1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 5 Aug 2020 10:18:11 +0200 Subject: [PATCH 245/902] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 325a129c..c0b48b0a 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 325a129c14de56b2055ee0e9e0da7dc74df5fec4 +Subproject commit c0b48b0a916913d1e6751d7744d1cec37559a81f From d1e2d6e56131432ea941722aa1b6ac3527d2b02d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 5 Aug 2020 10:37:36 +0200 Subject: [PATCH 246/902] set version in changelog to stable version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 255c2186..96f4559c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 4.0.0-develop +version 4.0.0 --------------------------- + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The From 5d5a335ae7791d360af366db3ce461bc6c07ca7e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 5 Aug 2020 10:40:12 +0200 Subject: [PATCH 247/902] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 944880fa..ee74734a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0 +4.1.0 From d1922724faf06dac8e835c395fc37e5d5e64f515 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 6 Aug 2020 13:21:07 +0200 Subject: [PATCH 248/902] add missing category for outputType in bcftools view --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 2677899b..8875903b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -58,7 +58,7 @@ task View { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 8591feb0815fd44472761359244c4ee6c6d45752 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 7 Aug 2020 13:42:20 +0200 Subject: [PATCH 249/902] Add pacbio bam2fastx tool. --- CHANGELOG.md | 4 ++ bam2fastx.wdl | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 bam2fastx.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index 96f4559c..272499c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.0-dev +--------------------------- ++ Add wdl file for pacbio's bam2fastx tool. + version 4.0.0 --------------------------- + Picard MergeVcf now uses compression level 1 by default. diff --git a/bam2fastx.wdl b/bam2fastx.wdl new file mode 100644 index 00000000..09c56897 --- /dev/null +++ b/bam2fastx.wdl @@ -0,0 +1,129 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Bam2Fasta { + input { + File inputFile + String outputPrefix + Int compressionLevel = 1 + Boolean uncompressedOutput = false + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bam2fasta \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} + ~{true="-u" false="" uncompressedOutput} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + ~{inputFile} + } + + output { + File fastaFile = outputPrefix + ".fasta.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input pacbio bam file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastaFile: {description: "The fasta output file."} + } +} + +task Bam2Fastq { + input { + File inputFile + String outputPrefix + Int compressionLevel = 1 + Boolean uncompressedOutput = false + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bam2fastq \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} + ~{true="-u" false="" uncompressedOutput} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + ~{inputFile} + } + + output { + File fastaFile = outputPrefix + ".fastq.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input pacbio bam file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastaFile: {description: "The fastq output file."} + } +} From 9ad9425766843e2706ff440457d6ec1d8b21916b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 7 Aug 2020 13:44:02 +0200 Subject: [PATCH 250/902] Correct output naming. --- bam2fastx.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 09c56897..21f1c604 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -102,7 +102,7 @@ task Bam2Fastq { } output { - File fastaFile = outputPrefix + ".fastq.gz" + File fastqFile = outputPrefix + ".fastq.gz" } runtime { @@ -124,6 +124,6 @@ task Bam2Fastq { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - fastaFile: {description: "The fastq output file."} + fastqFile: {description: "The fastq output file."} } } From aea639c83bc4b306df01986f55f4e774208e8a8e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 10 Aug 2020 11:27:23 +0200 Subject: [PATCH 251/902] Add index input to the tasks. --- bam2fastx.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 21f1c604..27ed15cc 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -23,6 +23,7 @@ version 1.0 task Bam2Fasta { input { File inputFile + File bamIndex String outputPrefix Int compressionLevel = 1 Boolean uncompressedOutput = false @@ -60,6 +61,7 @@ task Bam2Fasta { parameter_meta { # inputs inputFile: {description: "The input pacbio bam file.", category: "required"} + bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} @@ -77,6 +79,7 @@ task Bam2Fasta { task Bam2Fastq { input { File inputFile + File bamIndex String outputPrefix Int compressionLevel = 1 Boolean uncompressedOutput = false @@ -114,6 +117,7 @@ task Bam2Fastq { parameter_meta { # inputs inputFile: {description: "The input pacbio bam file.", category: "required"} + bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} From 628b9169e7791eaad69b3c58f3f0b324a529be12 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 10 Aug 2020 13:25:23 +0200 Subject: [PATCH 252/902] Add missing ". --- bam2fastx.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 27ed15cc..ccea6edb 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -39,9 +39,10 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + chmod 755 ~{inputFile} bam2fasta \ --output ~{outputPrefix} \ - -c ~{compressionLevel} + -c ~{compressionLevel} \ ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ @@ -95,9 +96,10 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + chmod 755 ~{inputFile} bam2fastq \ --output ~{outputPrefix} \ - -c ~{compressionLevel} + -c ~{compressionLevel} \ ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ From 4da76be86e4cfe93a63ab0700468c1be9f572683 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 10 Aug 2020 14:05:35 +0200 Subject: [PATCH 253/902] Remove left-over chmod. --- bam2fastx.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index ccea6edb..f9699d3b 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -39,7 +39,6 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - chmod 755 ~{inputFile} bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ @@ -96,7 +95,6 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - chmod 755 ~{inputFile} bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ From adac77e53089d7875c83ad16bb7271621c30abcb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 10:30:53 +0200 Subject: [PATCH 254/902] Remove uncompressed output options. --- bam2fastx.wdl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index f9699d3b..5e5fb50a 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -26,7 +26,6 @@ task Bam2Fasta { File bamIndex String outputPrefix Int compressionLevel = 1 - Boolean uncompressedOutput = false Boolean splitByBarcode = false String? seqIdPrefix @@ -42,7 +41,6 @@ task Bam2Fasta { bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ - ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ ~{inputFile} @@ -64,7 +62,6 @@ task Bam2Fasta { bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} - uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -82,7 +79,6 @@ task Bam2Fastq { File bamIndex String outputPrefix Int compressionLevel = 1 - Boolean uncompressedOutput = false Boolean splitByBarcode = false String? seqIdPrefix @@ -98,7 +94,6 @@ task Bam2Fastq { bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ - ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ ~{inputFile} @@ -120,7 +115,6 @@ task Bam2Fastq { bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} - uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From 10236b504fe1e272690e7976f4c281d0cfa13027 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 11 Aug 2020 13:23:33 +0200 Subject: [PATCH 255/902] update CHANGELOG --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0964883..8a882f09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,13 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.0-dev +version 4.1.0-dev --------------------------- ++ Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. version 4.0.0 --------------------------- -+ Added a task for GRIDSS. + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. From 710cb79676d11663c8d951373265c9c80325aee3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 14:14:55 +0200 Subject: [PATCH 256/902] Fix index localization. --- CHANGELOG.md | 2 ++ bam2fastx.wdl | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 272499c4..af613151 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add copy command to bam2fastx tasks to make sure bam file and its index are + always in the same directory. + Add wdl file for pacbio's bam2fastx tool. version 4.0.0 diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 5e5fb50a..6a09202f 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -24,6 +24,7 @@ task Bam2Fasta { input { File inputFile File bamIndex + String basenameInputFile = basename(inputFile) String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -38,12 +39,16 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + # The bam file and its index need to be in the same directory. + # Cromwell will put them in separate iputs folders. + cp ~{inputFile} ./ + cp ~{bamIndex} ./ bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ./~{basenameInputFile} } output { @@ -76,6 +81,7 @@ task Bam2Fasta { task Bam2Fastq { input { File inputFile + String basenameInputFile = basename(inputFile) File bamIndex String outputPrefix Int compressionLevel = 1 @@ -91,12 +97,16 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + # The bam file and its index need to be in the same directory. + # Cromwell will put them in separate iputs folders. + cp ~{inputFile} ./ + cp ~{bamIndex} ./ bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ./~{basenameInputFile} } output { From 48351b26c1ab7caad71432b84a0fcfa51c9f4388 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Aug 2020 14:25:28 +0200 Subject: [PATCH 257/902] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4446daa8..ab8d8867 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 4.1.0-dev +version 5.0.0-dev --------------------------- + A bwa-mem2 task was created with the same interface (including usePostalt) as the bwa mem task. From 01df29baef579cce73a32cd109d5405d45c197bd Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 15:18:17 +0200 Subject: [PATCH 258/902] Change lima outputs. --- CHANGELOG.md | 3 +-- bam2fastx.wdl | 14 ++------------ lima.wdl | 9 ++++++--- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b839b35b..57bbfecb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,7 @@ that users understand how the changes affect the new version. version 4.1.0-dev --------------------------- -+ Add copy command to bam2fastx tasks to make sure bam file and its index are - always in the same directory. ++ Remove globs from lima outputs. + Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 6a09202f..5e5fb50a 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -24,7 +24,6 @@ task Bam2Fasta { input { File inputFile File bamIndex - String basenameInputFile = basename(inputFile) String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -39,16 +38,12 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - # The bam file and its index need to be in the same directory. - # Cromwell will put them in separate iputs folders. - cp ~{inputFile} ./ - cp ~{bamIndex} ./ bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ./~{basenameInputFile} + ~{inputFile} } output { @@ -81,7 +76,6 @@ task Bam2Fasta { task Bam2Fastq { input { File inputFile - String basenameInputFile = basename(inputFile) File bamIndex String outputPrefix Int compressionLevel = 1 @@ -97,16 +91,12 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - # The bam file and its index need to be in the same directory. - # Cromwell will put them in separate iputs folders. - cp ~{inputFile} ./ - cp ~{bamIndex} ./ bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ./~{basenameInputFile} + ~{inputFile} } output { diff --git a/lima.wdl b/lima.wdl index 2e8a7085..1a40b1c8 100644 --- a/lima.wdl +++ b/lima.wdl @@ -95,12 +95,15 @@ task Lima { cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" + find . -path "*.bam" > bamFiles.txt + find . -path "*.bam.pbi" > bamIndexes.txt + find . -path "*.subreadset.xml" > subreadsets.txt } output { - Array[File] limaBam = glob("*.bam") - Array[File] limaBamIndex = glob("*.bam.pbi") - Array[File] limaXml = glob("*.subreadset.xml") + Array[File] limaBam = read_lines("bamFiles.txt") + Array[File] limaBamIndex = read_lines("bamIndexes.txt") + Array[File] limaXml = read_lines("subreadsets.txt") File limaStderr = outputPrefix + ".fl.stderr.log" File limaJson = outputPrefix + ".fl.json" File limaCounts = outputPrefix + ".fl.lima.counts" From af73c53935206dd54b37079e1c8d6a5b053c4a46 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 17:50:15 +0200 Subject: [PATCH 259/902] Change inputs to arrays. --- CHANGELOG.md | 3 ++- bam2fastx.wdl | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57bbfecb..eb2ef271 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ that users understand how the changes affect the new version. version 4.1.0-dev --------------------------- -+ Remove globs from lima outputs. ++ Bam2fastx: Input bam and index are now arrays. ++ Lima: Remove globs from outputs. + Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 5e5fb50a..a8f1342c 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -22,8 +22,8 @@ version 1.0 task Bam2Fasta { input { - File inputFile - File bamIndex + Array[File]+ inputFile + Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -43,7 +43,7 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ~{sep=" " inputFile} } output { @@ -58,8 +58,8 @@ task Bam2Fasta { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file.", category: "required"} - bamIndex: {description: "The .pbi index for the input file.", category: "required"} + inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} @@ -75,8 +75,8 @@ task Bam2Fasta { task Bam2Fastq { input { - File inputFile - File bamIndex + Array[File]+ inputFile + Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -96,7 +96,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ~{sep=" " inputFile} } output { @@ -111,8 +111,8 @@ task Bam2Fastq { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file.", category: "required"} - bamIndex: {description: "The .pbi index for the input file.", category: "required"} + inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} From a4af699b14f325e1729e307a21058cb25da0d251 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 15:06:02 +0200 Subject: [PATCH 260/902] update gridss: add --jvmheap parameter --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 3d4b7d73..4ba4bc17 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,6 +33,7 @@ task GRIDSS { BwaIndex reference String outputPrefix = "gridss" + String jvmheapsize = "25G" Int threads = 1 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -45,6 +46,7 @@ task GRIDSS { --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ + ~{"--jvmheap " + jvmheapsize} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{normalBam} \ ~{tumorBam} From 9bfe4ebf231bd307dc546dff34c8b96823058718 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 15:09:40 +0200 Subject: [PATCH 261/902] update task/CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a882f09..2360a877 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.1.0-dev --------------------------- ++ Updated task gridss.wdl: add --jvmheap parameter + Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. From ac3ee59598026cb22cf40325dbf32b0bc5e988fb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 13 Aug 2020 15:17:53 +0200 Subject: [PATCH 262/902] Rename input files. --- bam2fastx.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index a8f1342c..42240cd4 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -22,7 +22,7 @@ version 1.0 task Bam2Fasta { input { - Array[File]+ inputFile + Array[File]+ bam Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 @@ -43,7 +43,7 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " inputFile} + ~{sep=" " bam} } output { @@ -58,7 +58,7 @@ task Bam2Fasta { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bam: {description: "The input pacbio bam file(s).", category: "required"} bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} @@ -75,7 +75,7 @@ task Bam2Fasta { task Bam2Fastq { input { - Array[File]+ inputFile + Array[File]+ bam Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 @@ -96,7 +96,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " inputFile} + ~{sep=" " bam} } output { @@ -111,7 +111,7 @@ task Bam2Fastq { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bam: {description: "The input pacbio bam file(s).", category: "required"} bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} From 8b378196020a0a4151dbb06d2452e2e05a3c12e5 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 16:01:16 +0200 Subject: [PATCH 263/902] update gridss.wdl --- gridss.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 4ba4bc17..04ea2e82 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task GRIDSS { BwaIndex reference String outputPrefix = "gridss" - String jvmheapsize = "25G" + Int jvmHeapSizeGb = 1 Int threads = 1 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -46,7 +46,7 @@ task GRIDSS { --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ - ~{"--jvmheap " + jvmheapsize} \ + ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{normalBam} \ ~{tumorBam} @@ -63,7 +63,7 @@ task GRIDSS { runtime { cpu: threads - memory: "32G" + memory: "~{jvmHeapSizeGb}G" docker: dockerImage } From b654fee3d284e55e1f73f21621ee01e18fa731a8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 16:21:30 +0200 Subject: [PATCH 264/902] change default jvmHeapSizeGb from 1G to 30G --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 04ea2e82..3b7859b6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task GRIDSS { BwaIndex reference String outputPrefix = "gridss" - Int jvmHeapSizeGb = 1 + Int jvmHeapSizeGb = 30 Int threads = 1 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -63,7 +63,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb}G" + memory: "~{jvmHeapSizeGb + 1}G" docker: dockerImage } From e941b853a9ff8e194c8b1af2dc28dffddb58d8be Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 08:56:34 +0200 Subject: [PATCH 265/902] add parameter_meta for SVcalling.gridss.jvmHeapSizeGb --- gridss.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/gridss.wdl b/gridss.wdl index 3b7859b6..3649cb1b 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -78,6 +78,7 @@ task GRIDSS { outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} + javaXmxMb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file From 1e8155c26a770e2aab4b46fcf74f5c98b4f7945d Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 09:04:25 +0200 Subject: [PATCH 266/902] small fix --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 3649cb1b..44b9e9f1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -78,7 +78,7 @@ task GRIDSS { outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} - javaXmxMb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file From 31b21e1e197b5c646b2ad202cd4fa56cc54816e8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 16:10:47 +0200 Subject: [PATCH 267/902] add bcftools annotate --- bcftools.wdl | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 8875903b..33685c33 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -22,6 +22,106 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task Annotate { + input { + File? annsFile + String? collapse + Array[String] columns = [] + String? exclude + Boolean force = false + File? headerLines + String? newId + String? include + Boolean keepSites = false + String? markSites + Boolean noVersion = false + String outputType = "z" + String? regions + File? regionsFile + File? renameChrs + Array[String] samples = [] + File? samplesFile + Boolean singleOverlaps = false + Array[String] removeAnns = [] + File inputFile + String outputPath = "output.vcf.gz" + + Int threads = 0 + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools annotate \ + -o ~{outputPath} \ + -O ~{outputType} \ + ~{"--annotations " + annsFile} \ + ~{"--collapse " + collapse} \ + ~{true="--columns" false="" length(columns) > 0} ~{sep="," columns} \ + ~{"--exclude " + exclude} \ + ~{true="--force" false="" force} \ + ~{"--header-lines " + headerLines} \ + ~{"--set-id " + newId} \ + ~{"--include " + include} \ + ~{true="--keep-sites" false="" keepSites} \ + ~{"--mark-sites " + markSites} \ + ~{true="--no-version" false="" noVersion} \ + ~{"--regions " + regions} \ + ~{"--regions-file " + regionsFile} \ + ~{"--rename-chrs " + renameChrs} \ + ~{true="--samples" false="" length(samples) > 0} ~{sep="," samples} \ + ~{"--samples-file " + samplesFile} \ + ~{true="--single-overlaps" false="" singleOverlaps} \ + ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ + ~{inputFile} + bcftools index --tbi ~{outputPath} + + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} + annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} + collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} + columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} + headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} + newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} + markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} + noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} + regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} + regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} + renameChrs: {description: "rename chromosomes according to the map in file (see man page for details).", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + samplesFile: {description: "File of samples to include.", category: "advanced"} + singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} + removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} + inputFile: {description: "A vcf or bcf file.", category: "required"} + + threads: {description: "Number of extra decompression threads [0].", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + } +} + task View { input { File inputFile From df6fe2df5e1276a39eaf6981f86b93d49cbbddda Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 16:12:38 +0200 Subject: [PATCH 268/902] small fix: change vcf to bcf --- delly.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index efa1bf60..f708f494 100644 --- a/delly.wdl +++ b/delly.wdl @@ -28,7 +28,7 @@ task CallSV { File bamIndex File referenceFasta File referenceFastaFai - String outputPath = "./delly/delly.vcf" + String outputPath = "./delly/delly.bcf" String memory = "15G" Int timeMinutes = 300 From 1241b96fce92f28fa747cde02081e00edb0aa506 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 17:12:04 +0200 Subject: [PATCH 269/902] add bcftools sort --- bcftools.wdl | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 33685c33..d72efde3 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -122,6 +122,44 @@ task Annotate { } } +task Sort { + input { + File inputFile + String outputPath = "output.vcf.gz" + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + String outputType = "z" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools sort \ + -o ~{outputPath} \ + -O ~{outputType} \ + ~{inputFile} + bcftools index --tbi ~{outputPath} + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } + + +} + task View { input { File inputFile From f07a59aede3deb6e2001e0907ce3073079a20d63 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 17:16:17 +0200 Subject: [PATCH 270/902] add output {} --- bcftools.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index d72efde3..520bcf15 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -142,6 +142,11 @@ task Sort { bcftools index --tbi ~{outputPath} } + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + runtime { memory: memory time_minutes: timeMinutes From 4664f90c91fd801a7cb6322cf69333fd44dfcd92 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 17:45:12 +0200 Subject: [PATCH 271/902] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1733c93f..85beb2eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. + Updated task gridss.wdl: add --jvmheap parameter From 3327f388f3ac184c1c0bc37dd2e920dc2e8e71fb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 17 Aug 2020 11:29:20 +0200 Subject: [PATCH 272/902] Update submodules. --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index c0b48b0a..0cca0f40 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c0b48b0a916913d1e6751d7744d1cec37559a81f +Subproject commit 0cca0f40a8e9121e8dcc9e76838f85835a0d8e94 From e554f35a07e4f6427e1d8ad1cb7ddcaf3fc50ce0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 20 Aug 2020 15:38:25 +0200 Subject: [PATCH 273/902] add sage task --- sage.wdl | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 sage.wdl diff --git a/sage.wdl b/sage.wdl new file mode 100644 index 00000000..dbc101dc --- /dev/null +++ b/sage.wdl @@ -0,0 +1,92 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sage { + input { + String tumorName + File tumorBam + File tumorBai + String? normalName + File? normalBam + File? normalBai + String assembly + File referenceFasta + File hotspotVcf + File panelBed + File highConfidenceBed + + Int timeMinutes = 60 #FIXME I've no idea how long this takes... + Int threads = 2 + String javaXmx = "32G" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + } + + command { + SAGE \ + -Xmx~{javaXmx} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -assembly ~{assembly} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspotVcf} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -threads ~{threads} \ + + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + time_minutes: timeMinutes + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + assembly: {description: "The assembly of the reference genomes, either hg19 or hg38.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + hotspotVcf: {description: "A VCF file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing a panel of cancer related genes.", category: "required"} + highConfidenceBed: {description: "A bed file describing high confidence regions.", category: "required"} + + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file From a6c3487834da5ce6b6a40ce2e966e9d899abb240 Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 24 Aug 2020 09:41:30 +0200 Subject: [PATCH 274/902] Add option to ignore masked reference --- vt.wdl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vt.wdl b/vt.wdl index d4c134b9..8a9f9de8 100644 --- a/vt.wdl +++ b/vt.wdl @@ -26,6 +26,7 @@ task Normalize { File inputVCFIndex File referenceFasta File referenceFastaFai + Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" String memory = "4G" @@ -33,9 +34,12 @@ task Normalize { } command { - set -e + set -eo pipefail mkdir -p "$(dirname ~{outputPath})" - vt normalize ~{inputVCF} -r ~{referenceFasta} | vt decompose -s - -o ~{outputPath} + vt normalize ~{inputVCF} \ + -r ~{referenceFasta} \ + ~{true="-m " false="" ignoreMaskedRef} \ + | vt decompose -s - -o ~{outputPath} } output { @@ -55,6 +59,7 @@ task Normalize { outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced" memory: {description: "The memory required to run the programs", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 47651a09cf7d3cd0fb45bdc20d5ef0227a3bbcd3 Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 24 Aug 2020 09:42:47 +0200 Subject: [PATCH 275/902] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85beb2eb..0d1805ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ vt: Add option to ignore masked reference. + bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. From fbad1676097484b301fed9e55b36d39dcd7a7524 Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 24 Aug 2020 09:55:52 +0200 Subject: [PATCH 276/902] Add closing bracket --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 8a9f9de8..99cc1318 100644 --- a/vt.wdl +++ b/vt.wdl @@ -59,7 +59,7 @@ task Normalize { outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced" + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From f335ac9b5d0d061fce172ebd843d76e46e3e1ed1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Aug 2020 12:56:51 +0200 Subject: [PATCH 277/902] adjust sage --- sage.wdl | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/sage.wdl b/sage.wdl index dbc101dc..ba0a6137 100644 --- a/sage.wdl +++ b/sage.wdl @@ -28,32 +28,27 @@ task Sage { String? normalName File? normalBam File? normalBai - String assembly File referenceFasta - File hotspotVcf - File panelBed - File highConfidenceBed + File referenceFastaDict + File referenceFastaFai + File knownHotspots + File codingRegsions Int timeMinutes = 60 #FIXME I've no idea how long this takes... - Int threads = 2 String javaXmx = "32G" + String memory = "33G" String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" } command { - SAGE \ - -Xmx~{javaXmx} \ + SAGE -Xmx~{javaXmx} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ ~{"-reference_bam " + normalBam} \ - -assembly ~{assembly} \ -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspotVcf} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -threads ~{threads} \ - + -known_hotspots ~{knownHotspots} \ + -coding_regions ~{codingRegsions} \ -out ~{outputPath} } @@ -74,12 +69,13 @@ task Sage { tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - assembly: {description: "The assembly of the reference genomes, either hg19 or hg38.", category: "required"} + normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} - hotspotVcf: {description: "A VCF file with hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file describing a panel of cancer related genes.", category: "required"} - highConfidenceBed: {description: "A bed file describing high confidence regions.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} + codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 2b8e422685de9ea6f63831d8780231a058c1b0cb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 1 Sep 2020 15:08:25 +0200 Subject: [PATCH 278/902] add sagev2 --- sage.wdl | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/sage.wdl b/sage.wdl index ba0a6137..ed3d0866 100644 --- a/sage.wdl +++ b/sage.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Sage { +task SageHotspot { input { String tumorName File tumorBam @@ -33,6 +33,7 @@ task Sage { File referenceFastaFai File knownHotspots File codingRegsions + String outputPath = "./sage_hotspot.vcf.gz" Int timeMinutes = 60 #FIXME I've no idea how long this takes... String javaXmx = "32G" @@ -58,7 +59,6 @@ task Sage { runtime { time_minutes: timeMinutes - cpu: threads docker: dockerImage memory: memory } @@ -77,6 +77,82 @@ task Sage { knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Sage { + input { + String tumorName + File tumorBam + String? normalName + File? normalBam + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File hotspots + File panelBed + File highConfidenceBed + String assembly = "hg38" + String outputPath = "./sage.vcf.gz" + + Int timeMinutes = 60 #FIXME I've no idea how long this takes... + String javaXmx = "32G" + String memory = "33G" + Int threads = 2 + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + } + + command { + java -Xmx~{javaXmx} \ + -cp /usr/local/share/hmftools-sage-2.2-0/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{assembly} \ + -threads ~{threads} \ + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + time_minutes: timeMinutes + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A VCF file containg hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file containing a panel of genes of intrest.", category: "required"} + highConfidenceBed: {description: "A bed file containing high confidence regions.", category: "required"} + assembly: {description: "The genome assembly used, either \"hg19\" or \"hg38\".", category: "common"} + outputPath: {description: "The path to write the output VCF to.", category: "common"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 4b249fde4a8e5558039553e4c2e7fa78a5251e6d Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 4 Sep 2020 15:37:32 +0200 Subject: [PATCH 279/902] replace binary digits to boolean --- survivor.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/survivor.wdl b/survivor.wdl index e5ac7b5b..b9583009 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -27,9 +27,9 @@ task Merge { Array[File] filePaths Int breakpointDistance = 1000 Int suppVecs = 2 - Int svType = 1 - Int strandType = 1 - Int distanceBySvSize = 0 + Boolean svType = true + Boolean strandType = true + Boolean distanceBySvSize = false Int minSize = 30 String outputPath = "./survivor/merged.vcf" String memory = "24G" @@ -45,9 +45,9 @@ task Merge { fileList \ ~{breakpointDistance} \ ~{suppVecs} \ - ~{svType} \ - ~{strandType} \ - ~{distanceBySvSize} \ + ~{true=1 false=0 svType} \ + ~{true=1 false=0 strandType} \ + ~{true=1 false=0 distanceBySvSize} \ ~{minSize} \ ~{outputPath} } From f12093281cb37c0521098e8377fc7ef83bc2c618 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 4 Sep 2020 15:41:04 +0200 Subject: [PATCH 280/902] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d1805ed..121c8768 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. + bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. From 7bc3c58d309fcb20d9769180f471d79432d2e350 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 7 Sep 2020 17:26:42 +0200 Subject: [PATCH 281/902] make bcftools indexing optional --- bcftools.wdl | 102 +++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 520bcf15..5d5a1ea6 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -52,6 +52,8 @@ task Annotate { String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } + Boolean indexing = if outputType == "z" then true else false + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -77,13 +79,14 @@ task Annotate { ~{true="--single-overlaps" false="" singleOverlaps} \ ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ ~{inputFile} - bcftools index --tbi ~{outputPath} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File? outputVcfIndex = outputPath + ".tbi" } runtime { @@ -132,6 +135,8 @@ task Sort { String outputType = "z" } + Boolean indexing = if outputType == "z" then true else false + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -139,12 +144,13 @@ task Sort { -o ~{outputPath} \ -O ~{outputType} \ ~{inputFile} - bcftools index --tbi ~{outputPath} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File? outputVcfIndex = outputPath + ".tbi" } runtime { @@ -165,50 +171,6 @@ task Sort { } -task View { - input { - File inputFile - String outputPath = "output.vcf.gz" - String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) - String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" - String outputType = "z" - Int compressionLevel = 1 - } - - command { - set -e - mkdir -p "$(dirname ~{outputPath})" - bcftools view \ - -o ~{outputPath} \ - -O ~{outputType} \ - -l ~{compressionLevel} \ - ~{inputFile} - bcftools index --tbi ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - inputFile: {description: "A vcf or bcf file.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - } -} - task Stats { input { File inputVcf @@ -313,3 +275,47 @@ task Stats { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} } } + +task View { + input { + File inputFile + String outputPath = "output.vcf" + Int compressionLevel = 0 + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + String outputType = if compressionLevel > 0 then "z" else "v" + Boolean indexing = if compressionLevel > 0 then true else false + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools view \ + -o ~{outputPath} \ + -l ~{compressionLevel} \ + -O ~{outputType} \ + ~{inputFile} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + } + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 5781179d1b806467b8ffc8d5a39e41d6e7c58a5c Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 7 Sep 2020 17:54:35 +0200 Subject: [PATCH 282/902] made output extension depends on compression level --- bcftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bcftools.wdl b/bcftools.wdl index 5d5a1ea6..10db8b98 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -288,6 +288,7 @@ task View { String outputType = if compressionLevel > 0 then "z" else "v" Boolean indexing = if compressionLevel > 0 then true else false + String outputFilePath = if compressionLevel > 0 then outputPath + ".gz" else outputPath command { set -e From 33cdf52e284dd503054f2668b178662e2f7ff152 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Sep 2020 13:34:35 +0200 Subject: [PATCH 283/902] update collect-columns to 1.0.0 --- collect-columns.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/collect-columns.wdl b/collect-columns.wdl index e4e3a948..fe41c5e8 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -29,13 +29,14 @@ task CollectColumns { Int? separator Array[String]? sampleNames Boolean header = false + Boolean sumOnDuplicateId = false Array[String]? additionalAttributes File? referenceGtf String? featureAttribute Int memoryGb = 4 + ceil(0.5 * length(inputTables)) Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/collect-columns:0.2.0--py_1" + String dockerImage = "quay.io/biocontainers/collect-columns:1.0.0--py_0" } command { @@ -49,6 +50,7 @@ task CollectColumns { ~{"-s " + separator} \ ~{true="-n" false="" defined(sampleNames)} ~{sep=" " sampleNames} \ ~{true="-H" false="" header} \ + ~{true="-S" false="" sumOnDuplicateId} \ ~{true="-a" false="" defined(additionalAttributes)} ~{sep=" " additionalAttributes} \ ~{"-g " + referenceGtf} \ ~{"-F " + featureAttribute} @@ -72,6 +74,7 @@ task CollectColumns { separator: {description: "Equivalent to the -s option of collect-columns.", category: "advanced"} sampleNames: {description: "Equivalent to the -n option of collect-columns.", category: "advanced"} header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} + sumOnDuplicateId: {description: "Equivalent to the -S flag of collect-columns.", category: "advanced"} additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", category: "advanced"} referenceGtf: {description: "Equivalent to the -g option of collect-columns.", category: "advanced"} featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} From 452b5810a358eeb915e6c5ba98525e210262811d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Sep 2020 13:40:51 +0200 Subject: [PATCH 284/902] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 121c8768..a9329bf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ collect-columns: updated docker image to version 1.0.0 and added the + `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. + bcftools: add sorting and annotation From a651adc575a7ca8707447958a84950d9378b5ee4 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 10 Sep 2020 11:53:38 +0200 Subject: [PATCH 285/902] add paramter meta compressionLevel --- bcftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bcftools.wdl b/bcftools.wdl index 10db8b98..affa805a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -314,6 +314,7 @@ task View { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 1643ff2c165b27ca8cacf66899c30ccad5e0f3b3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Sep 2020 16:29:07 +0200 Subject: [PATCH 286/902] update sage --- sage.wdl | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/sage.wdl b/sage.wdl index ba0a6137..cdce4680 100644 --- a/sage.wdl +++ b/sage.wdl @@ -31,33 +31,45 @@ task Sage { File referenceFasta File referenceFastaDict File referenceFastaFai - File knownHotspots - File codingRegsions + File hotspots + File panelBed + File highConfidenceBed + Boolean hg38 = false + String outputPath - Int timeMinutes = 60 #FIXME I've no idea how long this takes... + Int threads = 2 String javaXmx = "32G" String memory = "33G" - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" } command { - SAGE -Xmx~{javaXmx} \ + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ ~{"-reference_bam " + normalBam} \ -ref_genome ~{referenceFasta} \ - -known_hotspots ~{knownHotspots} \ - -coding_regions ~{codingRegsions} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{true="hg38" false="hg19" hg38} \ + -threads ~{threads} \ -out ~{outputPath} } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. + # This seems to be a systemic issue with R generated plots in biocontainers... } runtime { - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey cpu: threads docker: dockerImage memory: memory @@ -74,8 +86,9 @@ task Sage { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} - codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} + highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 5f61dd78277dd0d9b408ce866c9e9548b6f152a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Sep 2020 16:33:31 +0200 Subject: [PATCH 287/902] fix sage... --- sage.wdl | 89 +------------------------------------------------------- 1 file changed, 1 insertion(+), 88 deletions(-) diff --git a/sage.wdl b/sage.wdl index 251630ce..f6e8588b 100644 --- a/sage.wdl +++ b/sage.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task SageHotspot { +task Sage { input { String tumorName File tumorBam @@ -31,17 +31,11 @@ task SageHotspot { File referenceFasta File referenceFastaDict File referenceFastaFai -<<<<<<< HEAD File hotspots File panelBed File highConfidenceBed Boolean hg38 = false - String outputPath -======= - File knownHotspots - File codingRegsions String outputPath = "./sage_hotspot.vcf.gz" ->>>>>>> 2b8e422685de9ea6f63831d8780231a058c1b0cb Int threads = 2 String javaXmx = "32G" @@ -75,12 +69,8 @@ task SageHotspot { } runtime { -<<<<<<< HEAD time_minutes: timeMinutes # !UnknownRuntimeKey cpu: threads -======= - time_minutes: timeMinutes ->>>>>>> 2b8e422685de9ea6f63831d8780231a058c1b0cb docker: dockerImage memory: memory } @@ -108,80 +98,3 @@ task SageHotspot { category: "advanced"} } } - -task Sage { - input { - String tumorName - File tumorBam - String? normalName - File? normalBam - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File hotspots - File panelBed - File highConfidenceBed - String assembly = "hg38" - String outputPath = "./sage.vcf.gz" - - Int timeMinutes = 60 #FIXME I've no idea how long this takes... - String javaXmx = "32G" - String memory = "33G" - Int threads = 2 - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" - } - - command { - java -Xmx~{javaXmx} \ - -cp /usr/local/share/hmftools-sage-2.2-0/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ - -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{assembly} \ - -threads ~{threads} \ - -out ~{outputPath} - } - - output { - File outputVcf = outputPath - } - - runtime { - time_minutes: timeMinutes - cpu: threads - docker: dockerImage - memory: memory - } - - parameter_meta { - tumorName: {description: "The name of the tumor sample.", category: "required"} - tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - hotspots: {description: "A VCF file containg hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file containing a panel of genes of intrest.", category: "required"} - highConfidenceBed: {description: "A bed file containing high confidence regions.", category: "required"} - assembly: {description: "The genome assembly used, either \"hg19\" or \"hg38\".", category: "common"} - outputPath: {description: "The path to write the output VCF to.", category: "common"} - - threads: {description: "The number of threads to be used.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} \ No newline at end of file From bea730a027a6a3c27675af6e4c85bf72a9aad841 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 10:49:46 +0200 Subject: [PATCH 288/902] change default outputPath of sage --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index f6e8588b..71378bc7 100644 --- a/sage.wdl +++ b/sage.wdl @@ -35,7 +35,7 @@ task Sage { File panelBed File highConfidenceBed Boolean hg38 = false - String outputPath = "./sage_hotspot.vcf.gz" + String outputPath = "./sage.vcf.gz" Int threads = 2 String javaXmx = "32G" From c6d2c3ccc41031e7759655fa274ad0323362b418 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 12:02:12 +0200 Subject: [PATCH 289/902] change bai to bamIndex in sage --- sage.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sage.wdl b/sage.wdl index 71378bc7..79458cc1 100644 --- a/sage.wdl +++ b/sage.wdl @@ -24,10 +24,10 @@ task Sage { input { String tumorName File tumorBam - File tumorBai + File tumorBamIndex String? normalName File? normalBam - File? normalBai + File? normalBamIndex File referenceFasta File referenceFastaDict File referenceFastaFai From fb14c451e290628e6666181844c47c8716510565 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 12:14:32 +0200 Subject: [PATCH 290/902] fix paramter_meta --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index 79458cc1..7c04aa99 100644 --- a/sage.wdl +++ b/sage.wdl @@ -78,7 +78,7 @@ task Sage { parameter_meta { tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} From ddf76915f2fdb19774c782a957c5403f307933a7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 13:42:00 +0200 Subject: [PATCH 291/902] fix paramter_meta --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index 7c04aa99..ab42bee8 100644 --- a/sage.wdl +++ b/sage.wdl @@ -81,7 +81,7 @@ task Sage { tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} + normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} From ece83524abd6676c9666cf8027d27cdca77a7279 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 09:57:18 +0200 Subject: [PATCH 292/902] Add timeMinutes to Classify. --- CHANGELOG.md | 2 ++ centrifuge.wdl | 70 +++----------------------------------------------- 2 files changed, 5 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9329bf5..4c22ef8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Centrifuge: Add `timeMinutes` to `Classify` task and remove broken & + unnecessary downloading tasks. + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. diff --git a/centrifuge.wdl b/centrifuge.wdl index ee305325..bc2ea462 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -110,6 +110,7 @@ task Classify { Int threads = 4 String memory = "16G" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -150,6 +151,7 @@ task Classify { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -169,6 +171,7 @@ task Classify { excludeTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be excluded in classification procedure.", category: "common"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -233,73 +236,6 @@ task Inspect { } } -task Download { - input { - String libraryPath - Array[String]? domain - String executable = "centrifuge-download" - String? preCommand - String? seqTaxMapPath - String database = "refseq" - String? assemblyLevel - String? refseqCategory - Array[String]? taxIds - Boolean filterUnplaced = false - Boolean maskLowComplexRegions = false - Boolean downloadRnaSeqs = false - Boolean modifyHeader = false - Boolean downloadGiMap = false - } - - # This will use centrifuge-download to download. - # The bash statement at the beginning is to make sure - # the directory for the SeqTaxMapPath exists. - command { - set -e -o pipefail - ~{preCommand} - ~{"mkdir -p $(dirname " + seqTaxMapPath + ")"} - ~{executable} \ - -o ~{libraryPath} \ - ~{true='-d ' false='' defined(domain)}~{sep=',' domain} \ - ~{'-a "' + assemblyLevel + '"'} \ - ~{"-c " + refseqCategory} \ - ~{true='-t' false='' defined(taxIds)} '~{sep=',' taxIds}' \ - ~{true='-r' false='' downloadRnaSeqs} \ - ~{true='-u' false='' filterUnplaced} \ - ~{true='-m' false='' maskLowComplexRegions} \ - ~{true='-l' false='' modifyHeader} \ - ~{true='-g' false='' downloadGiMap} \ - ~{database} ~{">> " + seqTaxMapPath} - } - - output { - File seqTaxMap = "~{seqTaxMapPath}" - File library = libraryPath - Array[File] fastaFiles = glob(libraryPath + "/*/*.fna") - } - } - -task DownloadTaxonomy { - input { - String taxonomyDir - String executable = "centrifuge-download" - String? preCommand - } - - command { - set -e -o pipefail - ~{preCommand} - ~{executable} \ - -o ~{taxonomyDir} \ - taxonomy - } - - output { - File taxonomyTree = taxonomyDir + "/nodes.dmp" - File nameTable = taxonomyDir + "/names.dmp" - } - } - task KReport { input { File classification From 70747bdf89e05b3ab05cfebd75f5d13dff75741b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 10:03:12 +0200 Subject: [PATCH 293/902] Update CHANGELOG. --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c22ef8b..933081d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Centrifuge: Add `timeMinutes` to `Classify` task and remove broken & - unnecessary downloading tasks. ++ Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary + downloading tasks (alternative is refseqtools). + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. From 68120ed6530bf60cc114cffdeeed143d8b132c8e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 15:19:44 +0200 Subject: [PATCH 294/902] Add NanoQC and NanoPlot. --- nanopack.wdl | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 nanopack.wdl diff --git a/nanopack.wdl b/nanopack.wdl new file mode 100644 index 00000000..59193f96 --- /dev/null +++ b/nanopack.wdl @@ -0,0 +1,175 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task NanoPlot { + input { + File inputFile + String inputFileType + String outputDir + String outputPrefix + String outputPath = outputDir + outputPrefix + Boolean outputTsvStats = true + Boolean dropOutliers = false + Boolean logLengths = false + String format = "png" + Boolean showN50 = true + String title = basename(outputPrefix) + + Int? maxLength + Int? minLength + Int? minQual + String? readType + + Int threads = 2 + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/nanoplot:1.32.0--py_0" + } + + Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + NanoPlot \ + --threads ~{threads} \ + --outdir ~{outputDir} \ + --prefix ~{outputPrefix} \ + ~{true="--tsv_stats" false="" outputTsvStats} \ + ~{true="--drop_outliers" false="" dropOutliers} \ + ~{true="--loglength" false="" logLengths} \ + --format ~{format} \ + ~{true="--N50" false="--no-N50" showN50} \ + ~{fileTypeOptions[inputFileType] + inputFile} \ + ~{"--maxlength " + maxLength} \ + ~{"--minlength " + minLength} \ + ~{"--minqual " + minQual} \ + ~{"--readtype " + readType} + } + + output { + File dynamicHistogram = outputDir + outputPrefix + "Dynamic_Histogram_Read_length.html" + File readLengthHistogram = outputDir + outputPrefix + "HistogramReadlength.png" + File lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" + File lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" + File logScaleReadLengthHistogram = outputDir + outputPrefix + "LogTransformed_HistogramReadlength.png" + File report = outputDir + outputPrefix + "NanoPlot-report.html" + File weightedHistogram = outputDir + outputPrefix + "Weighted_HistogramReadlength.png" + File weightedLogScaleHistogram = outputDir + outputPrefix + "Weighted_LogTransformed_HistogramReadlength.png" + File yieldByLength = outputDir + outputPrefix + "Yield_By_Length.png" + File? stats = outputDir + outputPrefix + "NanoStats.txt" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input file.", category: "required"} + inputFileType: {description: "The format of the read file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + outputPrefix: {description: "Output file prefix.", category: "required"} + outputTsvStats: {description: "Output the stats file as a properly formatted TSV.", category: "common"} + dropOutliers: {description: "Drop outlier reads with extreme long length.", category: "advanced"} + logLengths: {description: "Additionally show logarithmic scaling of lengths in plots.", category: "advanced"} + format: {description: "Specify the output format of the plots.", category: "required"} + showN50: {description: "Show the N50 mark in the read length histogram.", category: "common"} + title: {description: "Add a title to all plots, requires quoting if using spaces.", category: "common"} + maxLength: {description: "Hide reads longer than length specified.", category: "advanced"} + minLength: {description: "Hide reads shorter than length specified.", category: "advanced"} + minQual: {description: "Drop reads with an average quality lower than specified.", category: "advanced"} + readType: {description: "Which read type to extract information about from summary. Options are 1D, 2D, 1D2", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dynamicHistogram: {description: ""} + readLengthHistogram: {description: ""} + lengthVsQualityScatterPlotDot: {description: ""} + lengthVsQualityScatterPlotKde: {description: ""} + logScaleReadLengthHistogram: {description: ""} + report: {description: ""} + weightedHistogram: {description: ""} + weightedLogScaleHistogram: {description: ""} + yieldByLength: {description: ""} + stats: {description: ""} + } +} + +task NanoQc { + input { + File inputFile + String outputDir + Boolean directRna = false + + Int? minLength + + Int threads = 2 + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputDir})" + nanoQC \ + --outdir ~{outputDir} \ + ~{true="--rna" false="" directRna} \ + ~{"--minlen " + minLength} \ + ~{inputFile} + } + + output { + File report = outputDir + "nanoQC.html" + File log = outputDir + "NanoQC.log" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + directRna: {description: "Fastq is from direct RNA-seq and contains U nucleotides.", category: "common"} + minLength: {description: "Filters the reads on a minimal length of the given range. Also plots the given length/2 of the begin and end of the reads.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + report: {description: ""} + log: {description: ""} + } +} From 00b947f945b5da4f44812d9ea6a41347b1dc2ba7 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 16:56:58 +0200 Subject: [PATCH 295/902] Update changelog. --- CHANGELOG.md | 1 + nanopack.wdl | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85beb2eb..7b4079cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add NanoPlot and NanoQC tasks. + bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. diff --git a/nanopack.wdl b/nanopack.wdl index 59193f96..661f99de 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -59,11 +59,11 @@ task NanoPlot { ~{true="--loglength" false="" logLengths} \ --format ~{format} \ ~{true="--N50" false="--no-N50" showN50} \ - ~{fileTypeOptions[inputFileType] + inputFile} \ ~{"--maxlength " + maxLength} \ ~{"--minlength " + minLength} \ ~{"--minqual " + minQual} \ - ~{"--readtype " + readType} + ~{"--readtype " + readType} \ + ~{fileTypeOptions[inputFileType] + inputFile} } output { @@ -129,7 +129,6 @@ task NanoQc { Int? minLength - Int threads = 2 String memory = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" @@ -151,7 +150,6 @@ task NanoQc { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -163,7 +161,6 @@ task NanoQc { outputDir: {description: "Output directory path.", category: "required"} directRna: {description: "Fastq is from direct RNA-seq and contains U nucleotides.", category: "common"} minLength: {description: "Filters the reads on a minimal length of the given range. Also plots the given length/2 of the begin and end of the reads.", category: "advanced"} - threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 39aa53feeacf6a3d9b96c5adc2eec9c85eb92bba Mon Sep 17 00:00:00 2001 From: Jasper Date: Mon, 14 Sep 2020 16:59:12 +0200 Subject: [PATCH 296/902] Update CHANGELOG.md --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bd7cbf1..cf85eb0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,10 @@ version 5.0.0-dev `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. -+ bcftools: add sorting and annotation ++ bcftools: add sorting and annotation. + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. -+ Updated task gridss.wdl: add --jvmheap parameter ++ Updated task gridss.wdl: add --jvmheap parameter. + A bwa-mem2 task was created with the same interface (including usePostalt) as the bwa mem task. + bwa mem and bwa kit are now one task. The usePostalt boolean can be used to From 5b46df4bd5c4ecbd130de52e081b3e9258627188 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 16 Sep 2020 09:58:48 +0200 Subject: [PATCH 297/902] Complete parameter_meta. --- nanopack.wdl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/nanopack.wdl b/nanopack.wdl index 661f99de..ba68af1b 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -108,16 +108,16 @@ task NanoPlot { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - dynamicHistogram: {description: ""} - readLengthHistogram: {description: ""} - lengthVsQualityScatterPlotDot: {description: ""} - lengthVsQualityScatterPlotKde: {description: ""} - logScaleReadLengthHistogram: {description: ""} - report: {description: ""} - weightedHistogram: {description: ""} - weightedLogScaleHistogram: {description: ""} - yieldByLength: {description: ""} - stats: {description: ""} + dynamicHistogram: {description: "Dynamic histogram of read length."} + readLengthHistogram: {description: "Histogram of read length."} + lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} + lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} + logScaleReadLengthHistogram: {description: "Histogram of read lengths after log transformation."} + report: {description: "Html summary report."} + weightedHistogram: {description: "Weighted histogram of read lengths."} + weightedLogScaleHistogram: {description: "Weighted histogram of read lengths after log transformation."} + yieldByLength: {description: "Cumulative yield plot."} + stats: {description: "NanoStats report."} } } @@ -166,7 +166,7 @@ task NanoQc { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - report: {description: ""} - log: {description: ""} + report: {description: "Html summary report."} + log: {description: "Progress report."} } } From af550dd024ff6fe5df365ebec58808f8517b2516 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 16 Sep 2020 12:11:06 +0200 Subject: [PATCH 298/902] Make some outputs optional. --- nanopack.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nanopack.wdl b/nanopack.wdl index ba68af1b..6860cf13 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -69,13 +69,13 @@ task NanoPlot { output { File dynamicHistogram = outputDir + outputPrefix + "Dynamic_Histogram_Read_length.html" File readLengthHistogram = outputDir + outputPrefix + "HistogramReadlength.png" - File lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" - File lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" File logScaleReadLengthHistogram = outputDir + outputPrefix + "LogTransformed_HistogramReadlength.png" File report = outputDir + outputPrefix + "NanoPlot-report.html" File weightedHistogram = outputDir + outputPrefix + "Weighted_HistogramReadlength.png" File weightedLogScaleHistogram = outputDir + outputPrefix + "Weighted_LogTransformed_HistogramReadlength.png" File yieldByLength = outputDir + outputPrefix + "Yield_By_Length.png" + File? lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" + File? lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" File? stats = outputDir + outputPrefix + "NanoStats.txt" } @@ -110,13 +110,13 @@ task NanoPlot { # outputs dynamicHistogram: {description: "Dynamic histogram of read length."} readLengthHistogram: {description: "Histogram of read length."} - lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} - lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} logScaleReadLengthHistogram: {description: "Histogram of read lengths after log transformation."} report: {description: "Html summary report."} weightedHistogram: {description: "Weighted histogram of read lengths."} weightedLogScaleHistogram: {description: "Weighted histogram of read lengths after log transformation."} yieldByLength: {description: "Cumulative yield plot."} + lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} + lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} stats: {description: "NanoStats report."} } } From 041721c1f49d981e18477ad208ecad3580fb9dbd Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 21 Sep 2020 16:10:57 +0200 Subject: [PATCH 299/902] Remove metrics file. --- CHANGELOG.md | 2 ++ centrifuge.wdl | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce42941e..b11e4223 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Centrifuge: Remove metrics file from classification (which causes the + summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 + Add NanoPlot and NanoQC tasks. + Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary downloading tasks (alternative is refseqtools). diff --git a/centrifuge.wdl b/centrifuge.wdl index bc2ea462..1e7a0b45 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -128,7 +128,6 @@ task Classify { ~{inputFormatOptions[inputFormat]} \ ~{true="--phred64" false="--phred33" phred64} \ --min-hitlen ~{minHitLength} \ - ~{"--met-file " + outputPrefix + "_alignment_metrics.tsv"} \ --threads ~{threads} \ ~{"--trim5 " + trim5} \ ~{"--trim3 " + trim3} \ @@ -143,7 +142,6 @@ task Classify { >>> output { - File metrics = outputPrefix + "_alignment_metrics.tsv" File classification = outputPrefix + "_classification.tsv" File report = outputPrefix + "_output_report.tsv" } @@ -175,7 +173,6 @@ task Classify { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - metrics: {description: "File with centrifuge metrics."} classification: {description: "File with the classification results."} report: {description: "File with a classification summary."} } From fbbfc5bec27636e709de907c871efaab24d8f1c1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 21 Sep 2020 16:13:57 +0200 Subject: [PATCH 300/902] Change indexing. --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b11e4223..142622e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + Centrifuge: Remove metrics file from classification (which causes the - summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 + summary report to be empty). + https://github.com/DaehwanKimLab/centrifuge/issues/83 + Add NanoPlot and NanoQC tasks. + Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary downloading tasks (alternative is refseqtools). From 66852ef0a1f5a08259a0f8eafc01d7a5d2bf1732 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Sep 2020 16:39:10 +0200 Subject: [PATCH 301/902] add snpeff task --- snpeff.wdl | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 snpeff.wdl diff --git a/snpeff.wdl b/snpeff.wdl new file mode 100644 index 00000000..e1c0184f --- /dev/null +++ b/snpeff.wdl @@ -0,0 +1,73 @@ +version 1.0 + +task snpEff { + input { + File vcf + File vcfIndex + String genomeVersion + File datadirZip + String outputPath = "./snpeff.vcf" + Boolean hgvs = true + Boolean lof = true + Boolean noDownstream = false + Boolean noIntergenic = false + Boolean noShiftHgvs = false + Int? upDownStreamLen + + String memory = "50G" + String javaXmx = "49G" + Int timeMinutes = 60 #FIXME + String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + unzip ~{datadirZip} + snpEff -Xmx~{javaXmx}G -XX:ParallelGCThreads=1 \ + -v \ + ~{genomeVersion} \ + -noDownload \ + -dataDir $PWD/data \ + ~{vcf} \ + ~{true="-hgvs" false="-noHgvs" hgvs} \ + ~{true="-lof" false="-noLof" lof} \ + ~{true="-no-downstream" false="" noDownstream} \ + ~{true="-no-intergenic" false="" noIntergenic} \ + ~{true="-noShiftHgvs" false="" noShiftHgvs} \ + ~{"-upDownStreamLen " + upDownStreamLen} \ + > ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + vcf: {description: "A VCF file to analyse.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "required"} + genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"} + datadirZip: {description: "A zip file containing the directory of databases. This zip file must contain a directory called `data`, with the database mentioned in the genomeVersion input as subdirectory.", + category: "required"} + outputPath: {description: "The path to write the output to.", category: "common"} + hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"} + lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"} + noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"} + noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"} + noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"} + upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 3ee13418733a762df9883266a73d14426bd26118 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 1 Oct 2020 09:47:52 +0200 Subject: [PATCH 302/902] typo --- snpeff.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index e1c0184f..95383b94 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -1,6 +1,6 @@ version 1.0 -task snpEff { +task SnpEff { input { File vcf File vcfIndex From 35bc2ba3fe927ed842464444506f191f4c268c84 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Fri, 2 Oct 2020 13:17:12 +0200 Subject: [PATCH 303/902] Add parameter meta for threads --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index c155f026..dd771415 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -362,6 +362,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 81095b1bb400c28b3ad01cfb6ddef7b6a74907ed Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Fri, 2 Oct 2020 13:19:18 +0200 Subject: [PATCH 304/902] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142622e2..e2f266e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Samtools: Add parameter meta for Merge task + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From 38333745daff01234eb36e178fb97ffb76c87d84 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Oct 2020 14:20:32 +0200 Subject: [PATCH 305/902] fix bcftools filter --- bcftools.wdl | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index affa805a..b1d6e5f0 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -125,6 +125,53 @@ task Annotate { } } +task Filter { + input { + File vcf + File vcfIndex + Array[String] include = [] + String outputPath = "./filtered.vcf.gz" + + String memory = "256M" + Int timeMinutes = 1 + ceil(size(vcf, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools \ + filter \ + ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ + ~{vcf} \ + -O z \ + -o ~{outputPath} + bctools index --tbi ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + vcf: {description: "The VCF file to operate on.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "required"} + include: {description: "Equivalent to the `-i` option.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + } +} + task Sort { input { File inputFile From 66399ba333105934575da4ff97e43f6e35ef06d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 5 Oct 2020 13:07:13 +0200 Subject: [PATCH 306/902] fix whitespace --- bcftools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index b1d6e5f0..619c1733 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -141,11 +141,11 @@ task Filter { set -e mkdir -p "$(dirname ~{outputPath})" bcftools \ - filter \ + filter \ ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ - ~{vcf} \ - -O z \ - -o ~{outputPath} + ~{vcf} \ + -O z \ + -o ~{outputPath} bctools index --tbi ~{outputPath} } From 28bd67e696bfb2302920cc76245f3a6a86161948 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 6 Oct 2020 13:40:28 +0200 Subject: [PATCH 307/902] Update CHANGELOG.md Co-authored-by: Davy Cats --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2f266e9..7668cd2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Samtools: Add parameter meta for Merge task ++ Samtools: Add `threads` to parameter meta for Merge task + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From afe600065e0d94a80ba68bba5f23bed8a9f52293 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 6 Oct 2020 13:40:44 +0200 Subject: [PATCH 308/902] Update samtools.wdl Co-authored-by: Davy Cats --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index dd771415..24d95aa4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -362,7 +362,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - threads: {description: "Number of threads to use.", category: "common"} + threads: {description: "Number of threads to use.", category: "advanced"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 09372028e140528ccc255b73c87b48ad45a93a77 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 6 Oct 2020 16:20:29 +0200 Subject: [PATCH 309/902] fix bcftools filter --- bcftools.wdl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 619c1733..0be3be93 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -129,7 +129,9 @@ task Filter { input { File vcf File vcfIndex - Array[String] include = [] + String? include + String? exclude + String? softFilter String outputPath = "./filtered.vcf.gz" String memory = "256M" @@ -142,7 +144,9 @@ task Filter { mkdir -p "$(dirname ~{outputPath})" bcftools \ filter \ - ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ + ~{"-i " + include} \ + ~{"-e " + exclude} \ + ~{"-s " + softFilter} ~{vcf} \ -O z \ -o ~{outputPath} From 2fdabcca7e4bba7e1ba2a30d6e47dfb478e58e11 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 7 Oct 2020 13:54:41 +0200 Subject: [PATCH 310/902] Update parameter_meta. --- CHANGELOG.md | 1 + nanopack.wdl | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142622e2..803c221b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ NanoPack: Add parameter_meta to NanoPlot task. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 diff --git a/nanopack.wdl b/nanopack.wdl index 6860cf13..e4d15135 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -92,6 +92,7 @@ task NanoPlot { inputFileType: {description: "The format of the read file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputPrefix: {description: "Output file prefix.", category: "required"} + outputPath: {description: "Combination of the outputDir & outputPrefix strings.", category: "advanced"} outputTsvStats: {description: "Output the stats file as a properly formatted TSV.", category: "common"} dropOutliers: {description: "Drop outlier reads with extreme long length.", category: "advanced"} logLengths: {description: "Additionally show logarithmic scaling of lengths in plots.", category: "advanced"} From 6eaf21442d9352266f0ac3e108cf1dc084c1c9f4 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:07:39 +0200 Subject: [PATCH 311/902] Ensure that the index and bamfiles are in the same folder --- bam2fastx.wdl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 42240cd4..18434755 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -91,12 +91,25 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + + # Localise the bam and pbi files so they are next to each other in the + # current folder + bamfiles="" + for bamfile in ~{sep=" " bam};do + ln $bamfile . + bamfiles=$bamfiles" $(basename $bamfile)" + done + + for bamindex in ~{sep=" " bamIndex}; do + ln $bamindex . + done + bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " bam} + $bamfiles } output { From 0dd0afd61c43b625146adce4b4507ec85803381a Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:08:02 +0200 Subject: [PATCH 312/902] Add bam index file as required input for isoseq --- isoseq3.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/isoseq3.wdl b/isoseq3.wdl index 604a71d5..7894b382 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -26,6 +26,7 @@ task Refine { Boolean requirePolyA = false String logLevel = "WARN" File inputBamFile + File inputBamIndex File primerFile String outputDir String outputNamePrefix From 571544cbcbeeda14eadce3b7d633626fcb4f518e Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:08:23 +0200 Subject: [PATCH 313/902] Simplify lima output structure --- lima.wdl | 32 ++++++++++---------------------- samtools.wdl | 1 + 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/lima.wdl b/lima.wdl index 1a40b1c8..38cf2d6e 100644 --- a/lima.wdl +++ b/lima.wdl @@ -58,7 +58,6 @@ task Lima { command { set -e - mkdir -p "$(dirname ~{outputPrefix})" lima \ ~{libraryDesignOptions[libraryDesign]} \ ~{true="--score-full-pass" false="" scoreFullPass} \ @@ -83,32 +82,21 @@ task Lima { ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ + ~{"--log-file " + outputPrefix + ".stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{basename(outputPrefix) + ".fl.bam"} - - # copy commands below are needed because glob command does not find - # multiple bam/bam.pbi/subreadset.xml files when not located in working - # directory. - cp "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" - cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" - cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" - cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" - find . -path "*.bam" > bamFiles.txt - find . -path "*.bam.pbi" > bamIndexes.txt - find . -path "*.subreadset.xml" > subreadsets.txt + ~{outputPrefix + ".bam"} } output { - Array[File] limaBam = read_lines("bamFiles.txt") - Array[File] limaBamIndex = read_lines("bamIndexes.txt") - Array[File] limaXml = read_lines("subreadsets.txt") - File limaStderr = outputPrefix + ".fl.stderr.log" - File limaJson = outputPrefix + ".fl.json" - File limaCounts = outputPrefix + ".fl.lima.counts" - File limaReport = outputPrefix + ".fl.lima.report" - File limaSummary = outputPrefix + ".fl.lima.summary" + Array[File] limaBam = glob("*.bam") + Array[File] limaBamIndex = glob("*.bam.pbi") + Array[File] limaXml = glob("*.subreadset.xml") + File limaStderr = outputPrefix + ".stderr.log" + File limaJson = outputPrefix + ".json" + File limaCounts = outputPrefix + ".lima.counts" + File limaReport = outputPrefix + ".lima.report" + File limaSummary = outputPrefix + ".lima.summary" } runtime { diff --git a/samtools.wdl b/samtools.wdl index c155f026..dd771415 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -362,6 +362,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 5ca9c5e22734456a7735ce383d695877e6cb9c08 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:09:11 +0200 Subject: [PATCH 314/902] Add task for indexing PacBio bam files --- pbbam.wdl | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 pbbam.wdl diff --git a/pbbam.wdl b/pbbam.wdl new file mode 100644 index 00000000..368ff4ed --- /dev/null +++ b/pbbam.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + +task Index { + input { + File bamFile + String? outputBamPath + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" + } + + # Select_first is needed, otherwise womtool validate fails. + String outputPath = select_first([outputBamPath, basename(bamFile)]) + String bamIndexPath = outputPath + ".pbi" + + command { + bash -c ' + set -e + # Make sure outputBamPath does not exist. + if [ ! -f ~{outputPath} ] + then + mkdir -p "$(dirname ~{outputPath})" + ln ~{bamFile} ~{outputPath} + fi + pbindex ~{outputPath} ~{bamIndexPath} + ' + } + + output { + File indexedBam = outputPath + File index = bamIndexPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bamFile: {description: "The BAM file for which an index should be made.", category: "required"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", + category: "common"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From d8848dc95d73402eb92483456a35eaac9040a83e Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:10:05 +0200 Subject: [PATCH 315/902] Make intervals optional for gatk GenotypeGVCFs --- gatk.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index e0209a0c..12416dda 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -820,7 +820,7 @@ task GenotypeGVCFs { input { File gvcfFile File gvcfFileIndex - Array[File]+ intervals + Array[File]? intervals String outputPath File referenceFasta File referenceFastaDict @@ -846,9 +846,9 @@ task GenotypeGVCFs { ~{"-D " + dbsnpVCF} \ ~{"--pedigree " + pedigree} \ ~{true="-G" false="" length(annotationGroups) > 0} ~{sep=" -G " annotationGroups} \ - --only-output-calls-starting-in-intervals \ -V ~{gvcfFile} \ - -L ~{sep=' -L ' intervals} + ~{true="--only-output-calls-starting-in-intervals" false="" defined(intervals)} \ + ~{true="-L" false="" defined(intervals)} ~{sep=' -L ' intervals} } output { @@ -866,7 +866,7 @@ task GenotypeGVCFs { parameter_meta { gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"} gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"} - intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"} outputPath: {description: "The location to write the output VCF file to.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} From d786fcec2cf3b7ecbe0cdbccbe412cef382fac71 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:10:42 +0200 Subject: [PATCH 316/902] Increase runtime and add sample name for pbmm2 --- pbmm2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 84fbd2d0..31d4c667 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -30,7 +30,7 @@ task Mapping { Int cores = 4 String memory = "30G" - Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" } @@ -41,6 +41,7 @@ task Mapping { -j ~{cores} \ ~{referenceMMI} \ ~{queryFile} \ + --sample ~{sample} \ ~{sample}.align.bam } From a2ae010f8efa3f9d03ea99b61038419956be98b3 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:17:10 +0200 Subject: [PATCH 317/902] Add HsMetrics and VariantcallingMetrics to picard --- picard.wdl | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/picard.wdl b/picard.wdl index 1afa5ea7..9603db8c 100644 --- a/picard.wdl +++ b/picard.wdl @@ -66,6 +66,70 @@ task BedToIntervalList { } } +task CollectHsMetrics { + input { + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File targets + File? baits + String basename + + + # Use the targets file as baits as a fallback, since often the baits + # for a certain capture kit are not available. + File baitsFile = select_first([baits, targets]) + File targetsFile = targets + + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 3072 + # Additional * 2 because picard multiple metrics reads the reference fasta twice. + Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \ + CollectHsMetrics \ + I=~{inputBam} \ + R=~{referenceFasta} \ + BAIT_INTERVALS=~{baitsFile} \ + TARGET_INTERVALS=~{targetsFile} \ + O="~{basename}.hs_metrics.txt" + } + + output { + File HsMetrics = basename + ".hs_metrics.txt" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: "~{memoryMb}M" + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CollectMultipleMetrics { input { File inputBam @@ -315,6 +379,53 @@ task CollectTargetedPcrMetrics { } } +task CollectVariantCallingMetrics { + input { + File dbsnp + File dbsnpIndex + File inputVCF + File inputVCFIndex + String basename + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmx} \ + CollectVariantCallingMetrics -XX:ParallelGCThreads=1 \ + DBSNP=~{dbsnp} \ + INPUT=~{inputVCF} \ + OUTPUT=~{basename} + } + + output { + File details = basename + ".variant_calling_detail_metrics" + File summary = basename + ".variant_calling_summary_metrics" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CreateSequenceDictionary { input { File inputFile From e2fbf4a0275a9ae27de653513cd9c6f1b6340915 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:17:25 +0200 Subject: [PATCH 318/902] Add deepvariant tasks --- deepvariant.wdl | 91 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 deepvariant.wdl diff --git a/deepvariant.wdl b/deepvariant.wdl new file mode 100644 index 00000000..88bdb352 --- /dev/null +++ b/deepvariant.wdl @@ -0,0 +1,91 @@ +version 1.0 + +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task RunDeepVariant { + input { + File referenceFasta + File referenceFastaIndex + File inputBam + File inputBamIndex + String modelType + String outputVcf + File? customizedModel + Int? numShards + String? outputGVcf + File? regions + String? sampleName + Boolean? VCFStatsReport = true + + String memory = "3G" + Int timeMinutes = 5000 + String dockerImage = "google/deepvariant:1.0.0" + } + + command { + set -e + + /opt/deepvariant/bin/run_deepvariant \ + --ref ~{referenceFasta} \ + --reads ~{inputBam} \ + --model_type ~{modelType} \ + --output_vcf ~{outputVcf} \ + ~{"--output_gvcf " + outputGVcf} \ + ~{"--customized_model " + customizedModel} \ + ~{"--num_shards " + numShards} \ + ~{"--regions} " + regions} \ + ~{"--sample_name " + sampleName} \ + ~{true="--vcf_stats_report" false="--novcf_stats_report" VCFStatsReport} + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + output { + File outputVCF = outputVcf + File outputVCFIndex = outputVCF + ".tbi" + File? outputGVCF = outputGVcf + File? outputGVCFIndex = outputGVcf + ".tbi" + Array[File] outputVCFStatsReport = glob("*.visual_report.html") + } + + parameter_meta { + referenceFasta: {description: "Genome reference to use", category: "required"} + referenceFastaIndex: {description: "Index for the genome reference file.", category: "required"} + inputBam: {description: "Aligned, sorted, indexed BAM file containing the reads we want to call.", category: "required"} + inputBamIndex: {description: "Index for the input bam file.", category: "required"} + modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag", category: "required"} + outputVcf: {description: "Path where we should write VCF file.", category: "required"} + customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used", category: "advanced"} + numShards: {description: "Number of shards for make_examples step.", category: "common"} + outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} + regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} + sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} + VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 41024c35d01b0a954a0eaf6f4f69ab93ec02833b Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:17:33 +0200 Subject: [PATCH 319/902] Add whatshap tasks --- whatshap.wdl | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 whatshap.wdl diff --git a/whatshap.wdl b/whatshap.wdl new file mode 100644 index 00000000..2506aa10 --- /dev/null +++ b/whatshap.wdl @@ -0,0 +1,275 @@ +version 1.0 + +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +task Phase { + input { + String outputVCF + File? reference + File? referenceIndex + Boolean? no_reference + String? tag + File? output_read_list + String? algorithm + Boolean? merge_reads + String? internal_downsampling + String? mapping_quality + Boolean? indels + Boolean? ignore_read_groups + String? sample + String? chromosome + String? error_rate + String? maximum_error_rate + String? threshold + String? negative_threshold + Boolean? full_genotyping + Boolean? distrust_genotypes + Boolean? include_homozygous + String? default_gq + String? gl_regularize_r + File? changed_genotype_list + String? ped + File? recombination_list + String? recomb_rate + File? gen_map + Boolean? no_genetic_haplo_typing + Boolean? use_ped_samples + File vcf + File vcfIndex + File phaseInput + File phaseInputIndex + + String memory = "4G" + Int timeMinutes = 120 + # Whatshap 1.0, tabix 0.2.5 + String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" + } + + command { + whatshap phase \ + ~{vcf} \ + ~{phaseInput} \ + ~{if defined(outputVCF) then ("--output " + '"' + outputVCF + '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{true="--no-reference" false="" no_reference} \ + ~{if defined(tag) then ("--tag " + '"' + tag + '"') else ""} \ + ~{if defined(output_read_list) then ("--output-read-list " + '"' + output_read_list + '"') else ""} \ + ~{if defined(algorithm) then ("--algorithm " + '"' + algorithm + '"') else ""} \ + ~{true="--merge-reads" false="" merge_reads} \ + ~{if defined(internal_downsampling) then ("--internal-downsampling " + '"' + internal_downsampling + '"') else ""} \ + ~{if defined(mapping_quality) then ("--mapping-quality " + '"' + mapping_quality + '"') else ""} \ + ~{true="--indels" false="" indels} \ + ~{true="--ignore-read-groups" false="" ignore_read_groups} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ + ~{if defined(error_rate) then ("--error-rate " + '"' + error_rate + '"') else ""} \ + ~{if defined(maximum_error_rate) then ("--maximum-error-rate " + '"' + maximum_error_rate + '"') else ""} \ + ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ + ~{if defined(negative_threshold) then ("--negative-threshold " + '"' + negative_threshold + '"') else ""} \ + ~{true="--full-genotyping" false="" full_genotyping} \ + ~{true="--distrust-genotypes" false="" distrust_genotypes} \ + ~{true="--include-homozygous" false="" include_homozygous} \ + ~{if defined(default_gq) then ("--default-gq " + '"' + default_gq + '"') else ""} \ + ~{if defined(gl_regularize_r) then ("--gl-regularizer " + '"' + gl_regularize_r + '"') else ""} \ + ~{if defined(changed_genotype_list) then ("--changed-genotype-list " + '"' + changed_genotype_list + '"') else ""} \ + ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ + ~{if defined(recombination_list) then ("--recombination-list " + '"' + recombination_list + '"') else ""} \ + ~{if defined(recomb_rate) then ("--recombrate " + '"' + recomb_rate + '"') else ""} \ + ~{if defined(gen_map) then ("--genmap " + '"' + gen_map + '"') else ""} \ + ~{true="--no-genetic-haplotyping" false="" no_genetic_haplo_typing} \ + ~{true="--use-ped-samples" false="" use_ped_samples} && \ + tabix -p vcf ~{outputVCF} + } + + output { + File phasedVCF = outputVCF + File phasedVCFIndex = outputVCF + ".tbi" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} + no_reference: {description: "Detect alleles without requiring a reference, at the expense of phasing quality (in particular for long reads)", category: "common"} + tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} + output_read_list: {description: "Write reads that have been used for phasing to FILE.", category: "advanced"} + algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} + merge_reads: {description: "Merge reads which are likely to come from the same haplotype (default: {description: do not merge reads)", category: "common"} + internal_downsampling: {description: "Coverage reduction parameter in the internal core phasing algorithm. Higher values increase runtime *exponentially* while possibly improving phasing quality marginally. Avoid using this in the normal case! (default: {description: 15)", category: "advanced"} + mapping_quality: {description: "Minimum mapping quality (default: {description: 20)", category: "common"} + indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} + ignore_read_groups: {description: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample.", category: "advanced"} + sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} + chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} + error_rate: {description: "The probability that a nucleotide is wrong in read merging model (default: {description: 0.15).", category: "advanced"} + maximum_error_rate: {description: "The maximum error rate of any edge of the read merging graph before discarding it (default: {description: 0.25).", category: "advanced"} + threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} + negative_threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from different haplotypes and the same haplotype in the read merging model (default: {description: 1000).", category: "advanced"} + full_genotyping: {description: "Completely re-genotype all variants based on read data, ignores all genotype data that might be present in the VCF (EXPERIMENTAL FEATURE).", category: "experimental"} + distrust_genotypes: {description: "Allow switching variants from hetero- to homozygous in an optimal solution (see documentation).", category: "advanced"} + include_homozygous: {description: "Also work on homozygous variants, which might be turned to heterozygous", category: "advanced"} + default_gq: {description: "Default genotype quality used as cost of changing a genotype when no genotype likelihoods are available (default 30)", category: "advanced"} + gl_regularize_r: {description: "Constant (float) to be used to regularize genotype likelihoods read from input VCF (default None).", category: "advanced"} + changed_genotype_list: {description: "Write list of changed genotypes to FILE.", category: "advanced"} + ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} + recombination_list: {description: "Write putative recombination events to FILE.", category: "advanced"} + recomb_rate: {description: "Recombination rate in cM/Mb (used with --ped). If given, a constant recombination rate is assumed (default: {description: 1.26cM/Mb).", category: "advanced"} + gen_map: {description: "File with genetic map (used with --ped) to be used instead of constant recombination rate, i.e. overrides option --recombrate.", category: "advanced"} + no_genetic_haplo_typing: {description: "Do not merge blocks that are not connected by reads (i.e. solely based on genotype status). Default: {description: when in --ped mode, merge all blocks that contain at least one homozygous genotype in at least one individual into one block.", category: "advanced"} + use_ped_samples: {description: "Only work on samples mentioned in the provided PED file.", category: "advanced"} + vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} + phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task Stats { + input { + String? gtf + String? sample + String? chr_lengths + String? tsv + Boolean? only_sn_vs + String? block_list + String? chromosome + File vcf + + String memory = "4G" + Int timeMinutes = 120 + # Whatshap 1.0, tabix 0.2.5 + String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" + } + + command { + whatshap stats \ + ~{vcf} \ + ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(chr_lengths) then ("--chr-lengths " + '"' + chr_lengths + '"') else ""} \ + ~{if defined(tsv) then ("--tsv " + '"' + tsv + '"') else ""} \ + ~{true="--only-snvs" false="" only_sn_vs} \ + ~{if defined(block_list) then ("--block-list " + '"' + block_list + '"') else ""} \ + ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} + } + + output { + File? phasedGTF = gtf + File? phasedTSV = tsv + File? phasedBlockList = block_list + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + gtf: "Write phased blocks to GTF file." + sample: "Name of the sample to process. If not given, use first sample found in VCF." + chr_lengths: "File with chromosome lengths (one line per chromosome, tab separated ' ') needed to compute N50 values." + tsv: "Filename to write statistics to (tab-separated)." + only_sn_vs: "Only process SNVs and ignore all other variants." + block_list: "Filename to write list of all blocks to (one block per line)." + chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." + vcf: "Phased VCF file" + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task Haplotag { + input { + String outputFile + File? reference + File? referenceFastaIndex + String? regions + Boolean? ignore_linked_read + String? linked_read_distance_cut_off + Boolean? ignore_read_groups + String? sample + String? output_haplo_tag_list + Boolean? tag_supplementary + File vcf + File vcfIndex + File alignments + File alignmentsIndex + + String memory = "4G" + Int timeMinutes = 120 + # Whatshap 1.0, tabix 0.2.5 + String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" + } + + command { + whatshap haplotag \ + ~{vcf} \ + ~{alignments} \ + ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ + ~{true="--ignore-linked-read" false="" ignore_linked_read} \ + ~{if defined(linked_read_distance_cut_off) then ("--linked-read-distance-cutoff " + '"' + linked_read_distance_cut_off + '"') else ""} \ + ~{true="--ignore-read-groups" false="" ignore_read_groups} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(output_haplo_tag_list) then ("--output-haplotag-list " + '"' + output_haplo_tag_list + '"') else ""} \ + ~{true="--tag-supplementary" false="" tag_supplementary} && \ + python3 -c "import pysam; pysam.index('~{outputFile}')" + } + + output { + File bam = outputFile + File bamIndex = outputFile + ".bai" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + outputFile: "Output file. If omitted, use standard output." + reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created" + regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." + ignore_linked_read: "Ignore linkage information stored in BX tags of the reads." + linked_read_distance_cut_off: "Assume reads with identical BX tags belong to different read clouds if their distance is larger than LINKEDREADDISTANCE (default: 50000)." + ignore_read_groups: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample." + sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." + output_haplo_tag_list: "Write assignments of read names to haplotypes (tab separated) to given output file. If filename ends in .gz, then output is gzipped." + tag_supplementary: "Also tag supplementary alignments. Supplementary alignments are assigned to the same haplotype the primary alignment has been assigned to (default: only tag primary alignments)." + vcf: "VCF file with phased variants (must be gzip-compressed and indexed)" + alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype" + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 788681506815ef10573eb86cea4efe22f300b5db Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:22:10 +0200 Subject: [PATCH 320/902] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142622e2..26711b72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ deepvariant: Add task for DeepVariant ++ gatk: Make intervals optional for GenotypeGVCFs ++ isoseq3: Add required bam index input to isoseq3 ++ pbbam: Add task for indexing PacBio bam files ++ picard: Add CollectHsMetrics and CollectVariantCallingMetrics + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From f531d274c8fcd0789318f08a61b2aa50bed0d3fa Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:23:53 +0200 Subject: [PATCH 321/902] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26711b72..2ef37f31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ deepvariant: Add task for DeepVariant ++ deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs + isoseq3: Add required bam index input to isoseq3 + pbbam: Add task for indexing PacBio bam files From 1f0a112b763687055b2b647d7f1845d4e57a5664 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:02 +0200 Subject: [PATCH 322/902] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ef37f31..f4d217fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + deepvariant: Add task for DeepVariant. -+ gatk: Make intervals optional for GenotypeGVCFs ++ gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3 + pbbam: Add task for indexing PacBio bam files + picard: Add CollectHsMetrics and CollectVariantCallingMetrics From d4cfd015be4aacc306454b4410bd6a98a79627bc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:11 +0200 Subject: [PATCH 323/902] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4d217fc..1f75492f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ version 5.0.0-dev --------------------------- + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. -+ isoseq3: Add required bam index input to isoseq3 ++ isoseq3: Add required bam index input to isoseq3. + pbbam: Add task for indexing PacBio bam files + picard: Add CollectHsMetrics and CollectVariantCallingMetrics + Centrifuge: Remove metrics file from classification (which causes the From 80c84a4ae5946a0297bc0f30afaec66f327a8d55 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:20 +0200 Subject: [PATCH 324/902] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f75492f..6230afbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ version 5.0.0-dev + gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3. + pbbam: Add task for indexing PacBio bam files -+ picard: Add CollectHsMetrics and CollectVariantCallingMetrics ++ picard: Add CollectHsMetrics and CollectVariantCallingMetrics. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From 31bbeddf090f618084a71ecbd33a90842aa46b40 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:35 +0200 Subject: [PATCH 325/902] Update picard.wdl Co-authored-by: Jasper --- picard.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 9603db8c..d6b23245 100644 --- a/picard.wdl +++ b/picard.wdl @@ -77,7 +77,6 @@ task CollectHsMetrics { File? baits String basename - # Use the targets file as baits as a fallback, since often the baits # for a certain capture kit are not available. File baitsFile = select_first([baits, targets]) From b6178110f9824758ac3a4e94f025825d23c170a2 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:36:20 +0200 Subject: [PATCH 326/902] Update parameter meta --- isoseq3.wdl | 1 + picard.wdl | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/isoseq3.wdl b/isoseq3.wdl index 7894b382..5060f0e7 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -73,6 +73,7 @@ task Refine { requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "Bam input file.", category: "required"} + inputBamIndex: {description: "Index for the Bam input file.", category: "required"} primerFile: {description: "Barcode/primer fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputNamePrefix: {description: "Basename of the output files.", category: "required"} diff --git a/picard.wdl b/picard.wdl index d6b23245..b5ad0cb4 100644 --- a/picard.wdl +++ b/picard.wdl @@ -415,6 +415,10 @@ task CollectVariantCallingMetrics { parameter_meta { # inputs + dbsnp: {description: "DBSNP vcf file to use with CollectVariantCallingMetrics.", category: "required"} + dbsnpIndex: {description: "Index file for the DBSNP VCF.", category: "required"} + inputVCF: {description: "Input VCF file", category: "required"} + inputVCFIndex: {description: "Index file for the input VCF.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 353224aadecf82940e915424a017870ff2580d20 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:42:45 +0200 Subject: [PATCH 327/902] Add parameter meta for CollectHsMetrics --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index b5ad0cb4..49db8b8b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -119,6 +119,8 @@ task CollectHsMetrics { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + targets: {description: "Picard interval file of the capture targets.", category: "required"} + baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", From 7c065d4046a50c89727a1377618919a14814d9c2 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 9 Oct 2020 11:29:04 +0200 Subject: [PATCH 328/902] remove outputType and indexing instead based on extension of the file --- bcftools.wdl | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index affa805a..e2251331 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -35,7 +35,6 @@ task Annotate { Boolean keepSites = false String? markSites Boolean noVersion = false - String outputType = "z" String? regions File? regionsFile File? renameChrs @@ -52,14 +51,14 @@ task Annotate { String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } - Boolean indexing = if outputType == "z" then true else false + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools annotate \ -o ~{outputPath} \ - -O ~{outputType} \ + -O ~{true="z" false="v" compressed} \ ~{"--annotations " + annsFile} \ ~{"--collapse " + collapse} \ ~{true="--columns" false="" length(columns) > 0} ~{sep="," columns} \ @@ -80,7 +79,7 @@ task Annotate { ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ ~{inputFile} - ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } @@ -97,7 +96,6 @@ task Annotate { parameter_meta { outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} @@ -132,20 +130,19 @@ task Sort { String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" - String outputType = "z" } - Boolean indexing = if outputType == "z" then true else false + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools sort \ -o ~{outputPath} \ - -O ~{outputType} \ + -O ~{true="z" false="v" compressed} \ ~{inputFile} - ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } output { @@ -162,7 +159,6 @@ task Sort { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -280,26 +276,22 @@ task View { input { File inputFile String outputPath = "output.vcf" - Int compressionLevel = 0 String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } - String outputType = if compressionLevel > 0 then "z" else "v" - Boolean indexing = if compressionLevel > 0 then true else false - String outputFilePath = if compressionLevel > 0 then outputPath + ".gz" else outputPath + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ -o ~{outputPath} \ - -l ~{compressionLevel} \ - -O ~{outputType} \ + -O ~{true="z" false="v" compressed} \ ~{inputFile} - ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath @@ -314,7 +306,6 @@ task View { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d96e2b14a6cd362b1d7cf8e613e10a19ee98e315 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 9 Oct 2020 11:47:20 +0200 Subject: [PATCH 329/902] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9329bf5..cfda7abb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ bcftools: remove outputType and implement indexing based on output file extension. + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. From 14d3118230bd2e42e5dec40e5312091518b6ab19 Mon Sep 17 00:00:00 2001 From: Jasper Date: Mon, 12 Oct 2020 13:25:10 +0200 Subject: [PATCH 330/902] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7668cd2b..11a39d89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Samtools: Add `threads` to parameter meta for Merge task ++ Samtools: Add `threads` to parameter meta for Merge task. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From 153438890ea1068846522b7e6386256bba48ab71 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 12 Oct 2020 15:53:41 +0200 Subject: [PATCH 331/902] add tmpDir input to specify temporary directory when sorting. --- CHANGELOG.md | 1 + bcftools.wdl | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c32d349..700bf0b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ bcftools: add tmpDir input to specify temporary directory when sorting. + bcftools: remove outputType and implement indexing based on output file extension. + NanoPack: Add parameter_meta to NanoPlot task. + Centrifuge: Remove metrics file from classification (which causes the diff --git a/bcftools.wdl b/bcftools.wdl index e2251331..63f2cacb 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -127,6 +127,7 @@ task Sort { input { File inputFile String outputPath = "output.vcf.gz" + String tmpDir = "./sorting-tmp" String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -136,10 +137,11 @@ task Sort { command { set -e - mkdir -p "$(dirname ~{outputPath})" + mkdir -p "$(dirname ~{outputPath})" ~{tmpDir} bcftools sort \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ + -T ~{tmpDir} \ ~{inputFile} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} From 72ad1f1b4c6123a72518de01e36c0ba6a79657bb Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 12 Oct 2020 16:21:06 +0200 Subject: [PATCH 332/902] add tmpDir to parameter_meta section --- bcftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bcftools.wdl b/bcftools.wdl index 63f2cacb..a0aeb442 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -161,6 +161,7 @@ task Sort { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + tmpDir: {description: "The location of the temporary files during the bcftools sorting.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4760d1873df4204bb64c38f6d6c8378c41568b46 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Oct 2020 10:08:58 +0200 Subject: [PATCH 333/902] remove redundant G in -Xmx in snpeff --- snpeff.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index 95383b94..079a720a 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -24,7 +24,7 @@ task SnpEff { set -e mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} - snpEff -Xmx~{javaXmx}G -XX:ParallelGCThreads=1 \ + snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -v \ ~{genomeVersion} \ -noDownload \ From 3fa0f1411831448f15e17506dfef9230b303a5f1 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 15 Oct 2020 15:38:45 +0200 Subject: [PATCH 334/902] Remove most inputs --- whatshap.wdl | 110 ++++++++------------------------------------------- 1 file changed, 16 insertions(+), 94 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 2506aa10..1334d45b 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -26,33 +26,13 @@ task Phase { String outputVCF File? reference File? referenceIndex - Boolean? no_reference String? tag - File? output_read_list String? algorithm - Boolean? merge_reads - String? internal_downsampling - String? mapping_quality Boolean? indels - Boolean? ignore_read_groups String? sample String? chromosome - String? error_rate - String? maximum_error_rate String? threshold - String? negative_threshold - Boolean? full_genotyping - Boolean? distrust_genotypes - Boolean? include_homozygous - String? default_gq - String? gl_regularize_r - File? changed_genotype_list String? ped - File? recombination_list - String? recomb_rate - File? gen_map - Boolean? no_genetic_haplo_typing - Boolean? use_ped_samples File vcf File vcfIndex File phaseInput @@ -70,33 +50,13 @@ task Phase { ~{phaseInput} \ ~{if defined(outputVCF) then ("--output " + '"' + outputVCF + '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ - ~{true="--no-reference" false="" no_reference} \ ~{if defined(tag) then ("--tag " + '"' + tag + '"') else ""} \ - ~{if defined(output_read_list) then ("--output-read-list " + '"' + output_read_list + '"') else ""} \ ~{if defined(algorithm) then ("--algorithm " + '"' + algorithm + '"') else ""} \ - ~{true="--merge-reads" false="" merge_reads} \ - ~{if defined(internal_downsampling) then ("--internal-downsampling " + '"' + internal_downsampling + '"') else ""} \ - ~{if defined(mapping_quality) then ("--mapping-quality " + '"' + mapping_quality + '"') else ""} \ ~{true="--indels" false="" indels} \ - ~{true="--ignore-read-groups" false="" ignore_read_groups} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ - ~{if defined(error_rate) then ("--error-rate " + '"' + error_rate + '"') else ""} \ - ~{if defined(maximum_error_rate) then ("--maximum-error-rate " + '"' + maximum_error_rate + '"') else ""} \ ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ - ~{if defined(negative_threshold) then ("--negative-threshold " + '"' + negative_threshold + '"') else ""} \ - ~{true="--full-genotyping" false="" full_genotyping} \ - ~{true="--distrust-genotypes" false="" distrust_genotypes} \ - ~{true="--include-homozygous" false="" include_homozygous} \ - ~{if defined(default_gq) then ("--default-gq " + '"' + default_gq + '"') else ""} \ - ~{if defined(gl_regularize_r) then ("--gl-regularizer " + '"' + gl_regularize_r + '"') else ""} \ - ~{if defined(changed_genotype_list) then ("--changed-genotype-list " + '"' + changed_genotype_list + '"') else ""} \ ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ - ~{if defined(recombination_list) then ("--recombination-list " + '"' + recombination_list + '"') else ""} \ - ~{if defined(recomb_rate) then ("--recombrate " + '"' + recomb_rate + '"') else ""} \ - ~{if defined(gen_map) then ("--genmap " + '"' + gen_map + '"') else ""} \ - ~{true="--no-genetic-haplotyping" false="" no_genetic_haplo_typing} \ - ~{true="--use-ped-samples" false="" use_ped_samples} && \ tabix -p vcf ~{outputVCF} } @@ -114,33 +74,13 @@ task Phase { parameter_meta { outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} - no_reference: {description: "Detect alleles without requiring a reference, at the expense of phasing quality (in particular for long reads)", category: "common"} tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} - output_read_list: {description: "Write reads that have been used for phasing to FILE.", category: "advanced"} algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} - merge_reads: {description: "Merge reads which are likely to come from the same haplotype (default: {description: do not merge reads)", category: "common"} - internal_downsampling: {description: "Coverage reduction parameter in the internal core phasing algorithm. Higher values increase runtime *exponentially* while possibly improving phasing quality marginally. Avoid using this in the normal case! (default: {description: 15)", category: "advanced"} - mapping_quality: {description: "Minimum mapping quality (default: {description: 20)", category: "common"} indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} - ignore_read_groups: {description: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample.", category: "advanced"} sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} - error_rate: {description: "The probability that a nucleotide is wrong in read merging model (default: {description: 0.15).", category: "advanced"} - maximum_error_rate: {description: "The maximum error rate of any edge of the read merging graph before discarding it (default: {description: 0.25).", category: "advanced"} threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} - negative_threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from different haplotypes and the same haplotype in the read merging model (default: {description: 1000).", category: "advanced"} - full_genotyping: {description: "Completely re-genotype all variants based on read data, ignores all genotype data that might be present in the VCF (EXPERIMENTAL FEATURE).", category: "experimental"} - distrust_genotypes: {description: "Allow switching variants from hetero- to homozygous in an optimal solution (see documentation).", category: "advanced"} - include_homozygous: {description: "Also work on homozygous variants, which might be turned to heterozygous", category: "advanced"} - default_gq: {description: "Default genotype quality used as cost of changing a genotype when no genotype likelihoods are available (default 30)", category: "advanced"} - gl_regularize_r: {description: "Constant (float) to be used to regularize genotype likelihoods read from input VCF (default None).", category: "advanced"} - changed_genotype_list: {description: "Write list of changed genotypes to FILE.", category: "advanced"} ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} - recombination_list: {description: "Write putative recombination events to FILE.", category: "advanced"} - recomb_rate: {description: "Recombination rate in cM/Mb (used with --ped). If given, a constant recombination rate is assumed (default: {description: 1.26cM/Mb).", category: "advanced"} - gen_map: {description: "File with genetic map (used with --ped) to be used instead of constant recombination rate, i.e. overrides option --recombrate.", category: "advanced"} - no_genetic_haplo_typing: {description: "Do not merge blocks that are not connected by reads (i.e. solely based on genotype status). Default: {description: when in --ped mode, merge all blocks that contain at least one homozygous genotype in at least one individual into one block.", category: "advanced"} - use_ped_samples: {description: "Only work on samples mentioned in the provided PED file.", category: "advanced"} vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} @@ -154,10 +94,8 @@ task Stats { input { String? gtf String? sample - String? chr_lengths String? tsv - Boolean? only_sn_vs - String? block_list + String? blockList String? chromosome File vcf @@ -168,21 +106,19 @@ task Stats { } command { - whatshap stats \ + whatshap stats \ ~{vcf} \ ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ - ~{if defined(chr_lengths) then ("--chr-lengths " + '"' + chr_lengths + '"') else ""} \ ~{if defined(tsv) then ("--tsv " + '"' + tsv + '"') else ""} \ - ~{true="--only-snvs" false="" only_sn_vs} \ - ~{if defined(block_list) then ("--block-list " + '"' + block_list + '"') else ""} \ + ~{if defined(blockList) then ("--block-list " + '"' + blockList + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} } output { - File? phasedGTF = gtf - File? phasedTSV = tsv - File? phasedBlockList = block_list + File? phasedGTF = gtf + File? phasedTSV = tsv + File? phasedBlockList = blockList } runtime { @@ -194,10 +130,8 @@ task Stats { parameter_meta { gtf: "Write phased blocks to GTF file." sample: "Name of the sample to process. If not given, use first sample found in VCF." - chr_lengths: "File with chromosome lengths (one line per chromosome, tab separated ' ') needed to compute N50 values." tsv: "Filename to write statistics to (tab-separated)." - only_sn_vs: "Only process SNVs and ignore all other variants." - block_list: "Filename to write list of all blocks to (one block per line)." + blockList: "Filename to write list of all blocks to (one block per line)." chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." vcf: "Phased VCF file" memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -212,12 +146,7 @@ task Haplotag { File? reference File? referenceFastaIndex String? regions - Boolean? ignore_linked_read - String? linked_read_distance_cut_off - Boolean? ignore_read_groups String? sample - String? output_haplo_tag_list - Boolean? tag_supplementary File vcf File vcfIndex File alignments @@ -230,24 +159,19 @@ task Haplotag { } command { - whatshap haplotag \ + whatshap haplotag \ ~{vcf} \ ~{alignments} \ ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{true="--ignore-linked-read" false="" ignore_linked_read} \ - ~{if defined(linked_read_distance_cut_off) then ("--linked-read-distance-cutoff " + '"' + linked_read_distance_cut_off + '"') else ""} \ - ~{true="--ignore-read-groups" false="" ignore_read_groups} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ - ~{if defined(output_haplo_tag_list) then ("--output-haplotag-list " + '"' + output_haplo_tag_list + '"') else ""} \ - ~{true="--tag-supplementary" false="" tag_supplementary} && \ python3 -c "import pysam; pysam.index('~{outputFile}')" } output { - File bam = outputFile - File bamIndex = outputFile + ".bai" + File bam = outputFile + File bamIndex = outputFile + ".bai" } runtime { @@ -258,16 +182,14 @@ task Haplotag { parameter_meta { outputFile: "Output file. If omitted, use standard output." - reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created" + reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." + referenceIndex: "Index for the reference file." regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." - ignore_linked_read: "Ignore linkage information stored in BX tags of the reads." - linked_read_distance_cut_off: "Assume reads with identical BX tags belong to different read clouds if their distance is larger than LINKEDREADDISTANCE (default: 50000)." - ignore_read_groups: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample." sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." - output_haplo_tag_list: "Write assignments of read names to haplotypes (tab separated) to given output file. If filename ends in .gz, then output is gzipped." - tag_supplementary: "Also tag supplementary alignments. Supplementary alignments are assigned to the same haplotype the primary alignment has been assigned to (default: only tag primary alignments)." - vcf: "VCF file with phased variants (must be gzip-compressed and indexed)" - alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype" + vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." + vcfIndex: "Index for the VCF or BCF file with variants to be phased." + alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype." + alignmentsIndex: "Index for the alignment file." memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 289a42d5baaaa7aa0a38cbadde436d610009d4f5 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 15 Oct 2020 15:50:14 +0200 Subject: [PATCH 335/902] Rename parameter meta for index --- whatshap.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whatshap.wdl b/whatshap.wdl index 1334d45b..2ee90f50 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -183,7 +183,7 @@ task Haplotag { parameter_meta { outputFile: "Output file. If omitted, use standard output." reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." - referenceIndex: "Index for the reference file." + referenceFastaIndex: "Index for the reference file." regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." From a772e3773feedcb22f7e18f8a1f0130fd9b3cf0c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 16 Oct 2020 15:08:33 +0200 Subject: [PATCH 336/902] add gripss, timeMinutes for gridss, fix typos --- bcftools.wdl | 2 +- gridss.wdl | 7 ++- gripss.wdl | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++ snpeff.wdl | 2 +- 4 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 gripss.wdl diff --git a/bcftools.wdl b/bcftools.wdl index 0be3be93..e68e527c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -150,7 +150,7 @@ task Filter { ~{vcf} \ -O z \ -o ~{outputPath} - bctools index --tbi ~{outputPath} + bcftools index --tbi ~{outputPath} } output { diff --git a/gridss.wdl b/gridss.wdl index 44b9e9f1..7516553d 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2017 Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,7 +34,8 @@ task GRIDSS { String outputPrefix = "gridss" Int jvmHeapSizeGb = 30 - Int threads = 1 + Int threads = 2 + Int timeMinutes = ceil(1440 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -64,6 +65,7 @@ task GRIDSS { runtime { cpu: threads memory: "~{jvmHeapSizeGb + 1}G" + time_minutes: timeMinutes docker: dockerImage } @@ -79,6 +81,7 @@ task GRIDSS { threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file diff --git a/gripss.wdl b/gripss.wdl new file mode 100644 index 00000000..6ed0bcf9 --- /dev/null +++ b/gripss.wdl @@ -0,0 +1,117 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task ApplicationKt { + input { + File inputVcf + String outputPath = "gripss.vcf.gz" + File referenceFasta + File breakpointHotspot + File breakendPon + File breakpointPon + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssApplicationKt \ + -ref_genome ~{referenceFasta} \ + -breakpoint_hotspot ~{breakpointHotspot} \ + -breakend_pon ~{breakendPon} \ + -breakpoint_pon ~{breakpointPon} \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "advanced"} + breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task HardFilterApplicationKt { + input { + File inputVcf + String outputPath = "gripss_hard_filter.vcf.gz" + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file diff --git a/snpeff.wdl b/snpeff.wdl index 079a720a..d639a036 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -24,7 +24,7 @@ task SnpEff { set -e mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} - snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -v \ ~{genomeVersion} \ -noDownload \ From 37ba60dd104f3a221c29d6fd6cf2e5c2be76e1ce Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 22 Oct 2020 07:31:08 +0200 Subject: [PATCH 337/902] Add memory to samtools Merge --- samtools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 24d95aa4..ad94338a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -332,6 +332,7 @@ task Merge { Int threads = 1 Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + String memory = "4G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -355,6 +356,7 @@ task Merge { runtime { cpu: threads docker: dockerImage + memory: memory time_minutes: timeMinutes } @@ -362,7 +364,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - threads: {description: "Number of threads to use.", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 6581d965977ab6a4f31058065bca84fc4106ed9f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 22 Oct 2020 14:05:48 +0200 Subject: [PATCH 338/902] add AnnotateInsertedSequence task to gridss.wdl --- gridss.wdl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 7516553d..78e4bd40 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -84,4 +84,60 @@ task GRIDSS { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } +} + +task AnnotateInsertedSequence { + input { + File inputVcf + String outputPath = "gridss.annotated.vcf.gz" + File viralReference + + Int threads = 8 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2 / threads) + } + + command { + java -Xmx~{javaXmx} \ + -Dsamjdk.create_index=true \ + -Dsamjdk.use_async_io_read_samtools=true \ + -Dsamjdk.use_async_io_write_samtools=true \ + -Dsamjdk.use_async_io_write_tribble=true \ + -Dsamjdk.buffer_size=4194304 \ + -cp /usr/local/share/gridss-2.9.4-0/gridss.jar \ + gridss.AnnotateInsertedSequence \ + REFERENCE_SEQUENCE=~{viralReference} \ + INPUT=~{inputVcf} \ + OUTPUT=~{outputPath} \ + ALIGNMENT=APPEND \ + WORKING_DIR='.' \ + WORKER_THREADS=~{threads} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF file.", category: "required"} + outputPath: {description: "The path the output will be written to.", category: "common"} + viralReference: {description: "A fasta file with viral sequences.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } \ No newline at end of file From cd64c02f84707a26ed6787e83269347ed6a69ca4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 22 Oct 2020 15:27:17 +0200 Subject: [PATCH 339/902] add some # !UnknownRuntimeKey --- gridss.wdl | 4 ++-- gripss.wdl | 4 ++-- snpeff.wdl | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 78e4bd40..89558ff3 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -65,7 +65,7 @@ task GRIDSS { runtime { cpu: threads memory: "~{jvmHeapSizeGb + 1}G" - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -124,7 +124,7 @@ task AnnotateInsertedSequence { runtime { cpu: threads memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/gripss.wdl b/gripss.wdl index 6ed0bcf9..3f500a60 100644 --- a/gripss.wdl +++ b/gripss.wdl @@ -54,7 +54,7 @@ task ApplicationKt { runtime { memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -100,7 +100,7 @@ task HardFilterApplicationKt { runtime { memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/snpeff.wdl b/snpeff.wdl index d639a036..a26fadbd 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -45,7 +45,7 @@ task SnpEff { runtime { docker: dockerImage - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey memory: memory } From 208e8f46530b8a1d0dbdbd3afa22bc7449c03da3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 09:37:14 +0100 Subject: [PATCH 340/902] add some missing inputs to gridss AnnotateInsertedSequence and add missing \ to bcftools Filter --- bcftools.wdl | 2 +- gridss.wdl | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index e68e527c..4703580a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -146,7 +146,7 @@ task Filter { filter \ ~{"-i " + include} \ ~{"-e " + exclude} \ - ~{"-s " + softFilter} + ~{"-s " + softFilter} \ ~{vcf} \ -O z \ -o ~{outputPath} diff --git a/gridss.wdl b/gridss.wdl index 89558ff3..cfbb7069 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -91,6 +91,8 @@ task AnnotateInsertedSequence { File inputVcf String outputPath = "gridss.annotated.vcf.gz" File viralReference + File viralReferenceFai + File viralReferenceDict Int threads = 8 String javaXmx = "8G" From 674158b82e2a637c536853113721c48db6e6d09c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 10:51:06 +0100 Subject: [PATCH 341/902] add license notice to snpeff, add index input for bcftools annotate, and BWA mem index image input for gridss annotate inserted sequences --- bcftools.wdl | 2 ++ gridss.wdl | 4 ++++ snpeff.wdl | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 4703580a..d358ab7b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,6 +44,7 @@ task Annotate { Boolean singleOverlaps = false Array[String] removeAnns = [] File inputFile + File? inputFileIndex String outputPath = "output.vcf.gz" Int threads = 0 @@ -117,6 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} + ipnutFileIndex: {description: "The index for the input vcf or bcf.", "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gridss.wdl b/gridss.wdl index cfbb7069..c444c854 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,6 +93,7 @@ task AnnotateInsertedSequence { File viralReference File viralReferenceFai File viralReferenceDict + File viralReferenceImg Int threads = 8 String javaXmx = "8G" @@ -134,6 +135,9 @@ task AnnotateInsertedSequence { inputVcf: {description: "The input VCF file.", category: "required"} outputPath: {description: "The path the output will be written to.", category: "common"} viralReference: {description: "A fasta file with viral sequences.", category: "required"} + viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} + viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} + viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", diff --git a/snpeff.wdl b/snpeff.wdl index a26fadbd..2a113c52 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -1,5 +1,27 @@ version 1.0 +# MIT License +# +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task SnpEff { input { File vcf From 836f40c11ad03ca513345ba56b6feb502b2724dc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 11:07:09 +0100 Subject: [PATCH 342/902] fix missing key in parameter_met --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index d358ab7b..064e2d6e 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,7 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} - ipnutFileIndex: {description: "The index for the input vcf or bcf.", "common"} + ipnutFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8bc34ddf78f998b838bec85e43926b25da42cc66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 11:12:19 +0100 Subject: [PATCH 343/902] typo --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 064e2d6e..3b512716 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,7 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} - ipnutFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} + inputFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 070cbb252016d18f59d52e4919a2a267f1c18671 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 13:31:03 +0100 Subject: [PATCH 344/902] add missing input --- bcftools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 3b512716..1dba7611 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -25,6 +25,7 @@ version 1.0 task Annotate { input { File? annsFile + File? annsFileIndex String? collapse Array[String] columns = [] String? exclude @@ -99,7 +100,8 @@ task Annotate { parameter_meta { outputPath: {description: "The location the output VCF file should be written.", category: "common"} outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} - annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} + annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "common"} + annsFileIndex: {description: "The index for annsFile.", category: "common"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} From 1e19fbb2a00187bfa10cab023aa52dacb1091e03 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 14:09:13 +0100 Subject: [PATCH 345/902] add missing inputs --- gripss.wdl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gripss.wdl b/gripss.wdl index 3f500a60..c9a8f27d 100644 --- a/gripss.wdl +++ b/gripss.wdl @@ -25,6 +25,8 @@ task ApplicationKt { File inputVcf String outputPath = "gripss.vcf.gz" File referenceFasta + File referenceFastaFai + File referenceFastaDict File breakpointHotspot File breakendPon File breakpointPon @@ -61,7 +63,10 @@ task ApplicationKt { parameter_meta { inputVcf: {description: "The input VCF.", category: "required"} outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "advanced"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} From d6109250b32299638c1d0f47edf580a69b0732b4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 16:36:10 +0100 Subject: [PATCH 346/902] add some cleanup to snpeff --- snpeff.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/snpeff.wdl b/snpeff.wdl index 2a113c52..85709079 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -59,6 +59,7 @@ task SnpEff { ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ > ~{outputPath} + rm -r $PWD/data } output { From a82be38ca7ff228233a5cd49c0495e3714a7ca79 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 09:32:18 +0100 Subject: [PATCH 347/902] Update pbbam.wdl Co-authored-by: Jasper --- pbbam.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pbbam.wdl b/pbbam.wdl index 368ff4ed..52737a00 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -23,6 +23,7 @@ task Index { input { File bamFile String? outputBamPath + String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" From 7db21a6481522746b0699c2756083d57326be164 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 16:02:35 +0100 Subject: [PATCH 348/902] Add support for outputPrefix with or without folder --- chunked-scatter.wdl | 3 +++ lima.wdl | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index b54a7d2e..8895c2a4 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -24,6 +24,7 @@ task ChunkedScatter { input { File inputFile String prefix = "./scatter" + Boolean splitContigs = false Int? chunkSize Int? overlap Int? minimumBasesPerFile @@ -40,6 +41,7 @@ task ChunkedScatter { ~{"-c " + chunkSize} \ ~{"-o " + overlap} \ ~{"-m " + minimumBasesPerFile} \ + ~{true="--split-contigs " false="" splitContigs} \ ~{inputFile} } @@ -108,6 +110,7 @@ task ScatterRegions { splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} + splitContigs: {description: "Allow contigs to be split during scattering.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/lima.wdl b/lima.wdl index 38cf2d6e..7ef9d4ab 100644 --- a/lima.wdl +++ b/lima.wdl @@ -58,6 +58,7 @@ task Lima { command { set -e + mkdir -p "$(dirname ~{outputPrefix})" lima \ ~{libraryDesignOptions[libraryDesign]} \ ~{true="--score-full-pass" false="" scoreFullPass} \ @@ -86,6 +87,15 @@ task Lima { ~{inputBamFile} \ ~{barcodeFile} \ ~{outputPrefix + ".bam"} + + # copy the files with the default filename to the folder specified in + # outputPrefix. + if [ "~{basename(outputPrefix)}.json" != "~{outputPrefix}.json" ]; then + cp "~{basename(outputPrefix)}.json" "~{outputPrefix}.json" + cp "~{basename(outputPrefix)}.lima.counts" "~{outputPrefix}.lima.counts" + cp "~{basename(outputPrefix)}.lima.report" "~{outputPrefix}.lima.report" + cp "~{basename(outputPrefix)}.lima.summary" "~{outputPrefix}.lima.summary" + fi } output { From a7445b829f0babf6257b376e71f48f4c860828cc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 16:13:02 +0100 Subject: [PATCH 349/902] Remove duplicate parameter meta entry --- chunked-scatter.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 8895c2a4..115c5ca4 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -110,7 +110,6 @@ task ScatterRegions { splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} - splitContigs: {description: "Allow contigs to be split during scattering.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From af075999debec07b821010b0e0d260c23b41e143 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 16:19:46 +0100 Subject: [PATCH 350/902] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c1f32dd..b27addab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ version 5.0.0-dev + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3. -+ pbbam: Add task for indexing PacBio bam files ++ pbbam: Add task for indexing PacBio bam files. + picard: Add CollectHsMetrics and CollectVariantCallingMetrics. + Samtools: Add `threads` to parameter meta for Merge task. + bcftools: add tmpDir input to specify temporary directory when sorting. From 8df9a800fb56341a2c0b964f9300d49394cf485d Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 06:52:29 +0100 Subject: [PATCH 351/902] Update to CCS version 5 --- ccs.wdl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index 60e43711..bcebefe9 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -29,12 +29,14 @@ task CCS { Float minReadQuality = 0.99 String logLevel = "WARN" File subreadsFile + File? subreadsIndexFile + String? chunkString String outputPrefix Int cores = 2 String memory = "2G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" + String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" } command { @@ -48,7 +50,8 @@ task CCS { --min-rq ~{minReadQuality} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - ~{"--report-file " + outputPrefix + ".ccs.report.txt"} \ + ~{"--chunk " + chunkString} \ + ~{"--report-json " + outputPrefix + ".ccs.report.json"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ ~{subreadsFile} \ ~{outputPrefix + ".ccs.bam"} @@ -57,7 +60,7 @@ task CCS { output { File ccsBam = outputPrefix + ".ccs.bam" File ccsBamIndex = outputPrefix + ".ccs.bam.pbi" - File ccsReport = outputPrefix + ".ccs.report.txt" + File ccsReport = outputPrefix + ".ccs.report.json" File ccsStderr = outputPrefix + ".ccs.stderr.log" } @@ -77,6 +80,9 @@ task CCS { minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsFile: {description: "Subreads input file.", category: "required"} + subreadsIndexFile: {description: "Index for the subreads input file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "advanced"} + chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From 910200447daeadbdf8b7698db39719ba35126498 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 06:54:14 +0100 Subject: [PATCH 352/902] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b27addab..e2068f49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3. From e29df66cd70df1681b892c8fb01af426beb4333a Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 07:03:02 +0100 Subject: [PATCH 353/902] Remove duplicate parameter meta --- ccs.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/ccs.wdl b/ccs.wdl index bcebefe9..5d9887bf 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -81,7 +81,6 @@ task CCS { logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsFile: {description: "Subreads input file.", category: "required"} subreadsIndexFile: {description: "Index for the subreads input file.", category: "required"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From ccfd843303c5186121de89a6d667dc1fb20f4100 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 09:12:11 +0100 Subject: [PATCH 354/902] Update parameter meta --- ccs.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccs.wdl b/ccs.wdl index 5d9887bf..cab15fea 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -80,7 +80,7 @@ task CCS { minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsFile: {description: "Subreads input file.", category: "required"} - subreadsIndexFile: {description: "Index for the subreads input file.", category: "required"} + subreadsIndexFile: {description: "Index for the subreads input file, required when using chunkString.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From 61ba73556876d2bb1a1cc73ca9765af29a8e45ba Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 09:45:40 +0100 Subject: [PATCH 355/902] Update parameter meta --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index ad94338a..9e415b0e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -367,6 +367,7 @@ task Merge { threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 7c63b058e9e1c23407bf5f07c04372d16226523a Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 10:05:57 +0100 Subject: [PATCH 356/902] Add postprocess argument to DeepVariant task --- deepvariant.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 88bdb352..10bc49c9 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -28,6 +28,7 @@ task RunDeepVariant { File inputBamIndex String modelType String outputVcf + String? postprocessVariantsExtraArgs File? customizedModel Int? numShards String? outputGVcf @@ -51,8 +52,9 @@ task RunDeepVariant { ~{"--output_gvcf " + outputGVcf} \ ~{"--customized_model " + customizedModel} \ ~{"--num_shards " + numShards} \ - ~{"--regions} " + regions} \ + ~{"--regions " + regions} \ ~{"--sample_name " + sampleName} \ + ~{"--postprocess_variants_extra_args " + postprocessVariantsExtraArgs} \ ~{true="--vcf_stats_report" false="--novcf_stats_report" VCFStatsReport} } From 05f14ce2fa3af46ef79afa3c868837ad49db0fb5 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 10:31:24 +0100 Subject: [PATCH 357/902] Update parameter meta --- deepvariant.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/deepvariant.wdl b/deepvariant.wdl index 10bc49c9..f5661886 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -85,6 +85,7 @@ task RunDeepVariant { regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} + postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 680563febf9dba81cff822f73ab599b351f3e7c6 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 10:33:36 +0100 Subject: [PATCH 358/902] Fix bug in whatshap task --- whatshap.wdl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 2ee90f50..93624590 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -45,6 +45,8 @@ task Phase { } command { + set -e + whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -56,7 +58,8 @@ task Phase { ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ - ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ + ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} + tabix -p vcf ~{outputVCF} } @@ -159,13 +162,16 @@ task Haplotag { } command { + set -e + whatshap haplotag \ ~{vcf} \ ~{alignments} \ ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} + python3 -c "import pysam; pysam.index('~{outputFile}')" } From f4fee79b3e26f11c9b6dce07a64e517596a6ca78 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 14:12:09 +0100 Subject: [PATCH 359/902] Update first set of tasks to uniform layout. --- CHANGELOG.md | 70 +++++++++++++++++++++++----------- CPAT.wdl | 20 ++++++---- bam2fastx.wdl | 45 +++++++++++++++------- bcftools.wdl | 93 +++++++++++++++++++++++---------------------- bedtools.wdl | 44 +++++++++++++-------- biowdl.wdl | 17 ++++----- bowtie.wdl | 33 ++++++++-------- bwa-mem2.wdl | 38 +++++++++--------- bwa.wdl | 36 +++++++++--------- ccs.wdl | 31 +++++++-------- centrifuge.wdl | 8 ++-- chunked-scatter.wdl | 17 +++++---- clever.wdl | 16 ++++---- collect-columns.wdl | 17 +++++---- common.wdl | 32 +++++++++------- cutadapt.wdl | 45 ++++++++++------------ deepvariant.wdl | 22 +++++------ 17 files changed, 324 insertions(+), 260 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2068f49..2c04b582 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ CCS: `cores` input has been renamed to `threads` to match tool naming. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. @@ -19,7 +20,8 @@ version 5.0.0-dev + picard: Add CollectHsMetrics and CollectVariantCallingMetrics. + Samtools: Add `threads` to parameter meta for Merge task. + bcftools: add tmpDir input to specify temporary directory when sorting. -+ bcftools: remove outputType and implement indexing based on output file extension. ++ bcftools: remove outputType and implement indexing based on output + file extension. + NanoPack: Add parameter_meta to NanoPlot task. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). @@ -111,8 +113,8 @@ version 4.0.0 + Change MultiQC inputs. It now accepts an array of reports files. It does not need access to a folder with the reports anymore. MultiQC can now be used as a normal WDL task without hacks. -+ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will make sure the - task will not fail if one of the metrics is set to false. ++ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will + make sure the task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. @@ -134,7 +136,8 @@ version 4.0.0 + Add faidx task to samtools. + Isoseq3: Remove dirname command from output folder creation step. + Isoseq3: Requires more memory by default, is now 2G. -+ Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. ++ Isoseq3: Remove cp commands and other bash magic, file naming is now + solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. @@ -145,7 +148,8 @@ version 3.1.0 + Lima: Add missing output to parameter_meta. + Lima: Remove outputPrefix variable from output section. + Isoseq3: Make sure stderr log file from Refine is unique and not overwritten. -+ Isoseq3: Add workaround in Refine for glob command not locating files in output directory. ++ Isoseq3: Add workaround in Refine for glob command not locating files + in output directory. + Isoseq3: Fix --min-polya-length argument syntax. + Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. @@ -189,10 +193,13 @@ version 3.0.0 + Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set whether output should be a GVCF. + Centrifuge: Add Krona task specific to Centrifuge. -+ Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located. ++ Centrifuge: Fix Centrifuge tests, where sometimes the index files could + still not be located. + Update parameter_meta for TALON, Centrifuge and Minimap2. -+ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location. -+ Add `minimumContigLength` input to PlotDenoisedCopyRatios and PlotModeledSegments. ++ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct + index files location. ++ Add `minimumContigLength` input to PlotDenoisedCopyRatios + and PlotModeledSegments. + Add `commonVariantSitesIndex` input to CollectAllelicCounts. + Centrifuge: Fix issue where Centrifuge could not locate index files. + Increase default memory of BWA mem to 32G (was 16G). @@ -228,11 +235,13 @@ version 3.0.0 + Removed the "extraArgs" input from FilterMutectCalls. + Removed unused "verbose" and "quiet" inputs from multiqc. + Added parameter_meta sections to a variety of tasks. -+ Picard's BedToIntervalList outputPath input is now optional (with a default of "regions.interval_list"). ++ Picard's BedToIntervalList outputPath input is now + optional (with a default of "regions.interval_list"). + TALON: Fix SQLite error concerning database/disk space being full. + Update htseq to default image version 0.11.2. + Update biowdl-input-converter in common.wdl to version 0.2.1. -+ Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. ++ Update TALON section to now include the new annotation file output, and + add config file creation to the TALON task. + Removed unused inputs (trimPrimer and format) for cutadapt. + Various minor command tweaks to increase stability. + Fixed unused inputs in bedtools sort (inputs are now used). @@ -245,7 +254,8 @@ version 2.1.0 + Updated biowdl-input-converter version. + GATK CombineGVCFs memory was tripled to prevent it from using a lot of CPU in Garbage Collection mode. -+ Updated parameter_meta sections for Minimap2 and TranscriptClean to wdl-aid format. ++ Updated parameter_meta sections for Minimap2 and TranscriptClean to + wdl-aid format. + Updated cores variable for TALON, the default is now 4. + Updated TALON to version 4.4. + Added parameter_meta sections to the following tools: @@ -262,10 +272,14 @@ version 2.1.0 version 2.0.0 --------------------------- + TranscriptClean: Update TranscriptClean to version 2.0.2. -+ Memory runtime attributes are now Strings indicating total memory, as opposed to Ints indicating memory per core. -+ Memory inputs for most tasks are now Strings, remaining Int memory inputs are renamed to "memoryGb". -+ Use the biowdl-input-converter container for JsonToYaml, to reduce the amount of containers needed. -+ Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists which it replaces. ++ Memory runtime attributes are now Strings indicating total memory, as + opposed to Ints indicating memory per core. ++ Memory inputs for most tasks are now Strings, remaining Int memory inputs + are renamed to "memoryGb". ++ Use the biowdl-input-converter container for JsonToYaml, to reduce the + amount of containers needed. ++ Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists + which it replaces. + GATK.GenotypeGVCFs: Increased memoryMultiplier from 2.0 to 3.0 . + Minimap2: Add -k option to minimap2 mapping. + Added bwakit task. @@ -279,7 +293,9 @@ version 1.0.0 + Removed deprecated tasks: + bioconda.installPrefix + mergecounts.MergeCounts -+ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" and "knownIndelsSitesVCFIndexes" are no longer optional, but now have a default of "[]". ++ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" + and "knownIndelsSitesVCFIndexes" are no longer optional, but + now have a default of "[]". + Removed BWA index task. + Removed unused "picardJar" input from bwa.wdl. + All inputs to bedtools Sort are now reflected in the generated command. @@ -295,17 +311,25 @@ version 1.0.0 + Fastqsplitter: use version 1.1. + Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency. + Common: Update dockerTag to dockerImage. -+ GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers. -+ Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls). -+ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. ++ GATK: Add CombineVariants task that allows, e.g., to merge VCFs + from different callers. ++ Mutect2: Add GATK tasks related to variant + filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, + CalculateContamination and FilterMutectCalls). ++ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring + an update to GATK 4.1.2.0. + Mutect2: Add necessary missing index attribute for panel of normals. + MultiQC: Add memory variable to multiqc task. -+ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need regions files as required inputs. -+ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired VCF filtering script. -+ Cutadapt: If the output is a gzipped file, compress with level 1 (instead of default 6). ++ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need + regions files as required inputs. ++ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired + VCF filtering script. ++ Cutadapt: If the output is a gzipped file, compress with + level 1 (instead of default 6). + Cutadapt: Fix issues with read2output when using single-end reads. + Add feature type, idattr and additional attributes to htseq-count. + Added allow-contain option to bowtie. + Added a changelog to keep track of changes. -+ Added sortByName task in samtools to support more memory efficient execution of HTSeqCount. ++ Added sortByName task in samtools to support more memory efficient + execution of HTSeqCount. + Removed the bam index from HTSeqCount's inputs. diff --git a/CPAT.wdl b/CPAT.wdl index 3b542e4f..d97031dc 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -26,17 +26,22 @@ task CPAT { String outFilePath File hex File logitModel + File? referenceGenome - File? referenceGenomeIndex # Should be added as input if - # CPAT should not index the reference genome. + # Should be added as input if CPAT should not index the + # reference genome. + File? referenceGenomeIndex Array[String]? startCodons Array[String]? stopCodons + Int timeMinutes = 10 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } - # Some WDL magic in the command section to properly output the start and stopcodons to the command. - # select_first is needed in order to convert the optional arrays to non-optionals. + # Some WDL magic in the command section to properly output the start and + # stopcodons to the command. + # select_first is needed in order to convert the optional arrays + # to non-optionals. command { set -e mkdir -p "$(dirname ~{outFilePath})" @@ -60,18 +65,17 @@ task CPAT { } parameter_meta { + # inputs gene: {description: "Equivalent to CPAT's `--gene` option.", category: "required"} outFilePath: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} hex: {description: "Equivalent to CPAT's `--hex` option.", category: "required"} logitModel: {description: "Equivalent to CPAT's `--logitModel` option.", category: "required"} referenceGenome: {description: "Equivalent to CPAT's `--ref` option.", category: "advanced"} - referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", - category: "advanced"} + referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", category: "advanced"} startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 18434755..e8884ab0 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -37,7 +37,22 @@ task Bam2Fasta { command { set -e - mkdir -p "$(dirname ~{outputPrefix})" + mkdir -p "$(dirname ~{outputPrefix})"' + + # Localise the bam and pbi files so they are next to each other in the + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam}; + do + ln ${bamFile} . + bamFiles=${bamFiles}" $(basename ${bamFile})" + done + + for index in ~{sep=" " bamIndex}; + do + ln ${index} . + done + bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ @@ -93,15 +108,17 @@ task Bam2Fastq { mkdir -p "$(dirname ~{outputPrefix})" # Localise the bam and pbi files so they are next to each other in the - # current folder - bamfiles="" - for bamfile in ~{sep=" " bam};do - ln $bamfile . - bamfiles=$bamfiles" $(basename $bamfile)" + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam}; + do + ln ${bamFile} . + bamFiles=${bamFiles}" $(basename ${bamFile})" done - for bamindex in ~{sep=" " bamIndex}; do - ln $bamindex . + for index in ~{sep=" " bamIndex}; + do + ln ${index} . done bam2fastq \ @@ -109,7 +126,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - $bamfiles + ${bamFiles} } output { diff --git a/bcftools.wdl b/bcftools.wdl index a0aeb442..41825747 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -24,26 +22,27 @@ version 1.0 task Annotate { input { + Array[String] columns = [] + Boolean force = false + Boolean keepSites = false + Boolean noVersion = false + Array[String] samples = [] + Boolean singleOverlaps = false + Array[String] removeAnns = [] + File inputFile + String outputPath = "output.vcf.gz" + File? annsFile String? collapse - Array[String] columns = [] String? exclude - Boolean force = false File? headerLines String? newId String? include - Boolean keepSites = false String? markSites - Boolean noVersion = false String? regions File? regionsFile File? renameChrs - Array[String] samples = [] File? samplesFile - Boolean singleOverlaps = false - Array[String] removeAnns = [] - File inputFile - String outputPath = "output.vcf.gz" Int threads = 0 String memory = "256M" @@ -80,9 +79,8 @@ task Annotate { ~{inputFile} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} - } - + output { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" @@ -95,31 +93,31 @@ task Annotate { } parameter_meta { + # inputs + columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} + force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} + keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} + noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} + removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} + inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} - columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} - force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} - keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} - noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} renameChrs: {description: "rename chromosomes according to the map in file (see man page for details).", category: "advanced"} - samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} samplesFile: {description: "File of samples to include.", category: "advanced"} - singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} - removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} - inputFile: {description: "A vcf or bcf file.", category: "required"} - threads: {description: "Number of extra decompression threads [0].", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -128,6 +126,7 @@ task Sort { File inputFile String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" + String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -159,6 +158,7 @@ task Sort { } parameter_meta { + # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} tmpDir: {description: "The location of the temporary files during the bcftools sorting.", category: "advanced"} @@ -166,46 +166,45 @@ task Sort { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } - - } task Stats { input { File inputVcf File inputVcfIndex + String outputPath = basename(inputVcf) + ".stats" + Boolean firstAlleleOnly = false + Boolean splitByID = false + Array[String] samples = [] + Boolean verbose = false + File? compareVcf File? compareVcfIndex - String outputPath = basename(inputVcf) + ".stats" String? afBins String? afTag - Boolean firstAlleleOnly = false String? collapse String? depth String? exclude - File? exons + File? exons String? applyFilters File? fastaRef File? fastaRefIndex - String? include - Boolean splitByID = false + String? include String? regions File? regionsFile - Array[String] samples = [] - File? samplesFile - String? targets + File? samplesFile + String? targets File? targetsFile String? userTsTv - Boolean verbose = false Int threads = 0 - Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. - String memory = "256M" + String memory = "256M" + Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } - + command { - set -e + set -e mkdir -p $(dirname ~{outputPath}) bcftools stats \ ~{"--af-bins " + afBins} \ @@ -237,19 +236,24 @@ task Stats { runtime { cpu: threads + 1 - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { + # inputs inputVcf: {description: "The VCF to be analysed.", category: "required"} inputVcfIndex: {description: "The index for the input VCF.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} + splitByID: {description: "Collect stats for sites with ID separately (known vs novel).", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} compareVcf: {description: "When inputVcf and compareVCF are given, the program generates separate stats for intersection and the complements. By default only sites are compared, samples must be given to include also sample columns.", category: "common"} compareVcfIndex: {description: "Index for the compareVcf.", category: "common"} afBins: {description: "Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1).", category: "advanced"} afTag: {description: "Allele frequency tag to use, by default estimated from AN,AC or GT.", category: "advanded"} - firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} depth: {description: "Depth distribution: min,max,bin size [0,500,1].", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} @@ -258,20 +262,16 @@ task Stats { fastaRef: {description: "Faidx indexed reference sequence file to determine INDEL context.", category: "advanced"} fastaRefIndex: {description: "Index file (.fai) for fastaRef. Must be supplied if fastaRef is supplied.", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} - splitByID: {description: "Collect stats for sites with ID separately (known vs novel).", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} - samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} samplesFile: {description: "File of samples to include.", category: "advanced"} targets: {description: "Similar to regions but streams rather than index-jumps.", category: "advanced"} targetsFile: {description: "Similar to regionsFile but streams rather than index-jumps.", category: "advanced"} userTsTv: {description: ". Collect Ts/Tv stats for any tag using the given binning [0:1:100].", category: "advanced"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} - verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -279,6 +279,7 @@ task View { input { File inputFile String outputPath = "output.vcf" + String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -296,6 +297,7 @@ task View { ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } + output { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" @@ -308,6 +310,7 @@ task View { } parameter_meta { + # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/bedtools.wdl b/bedtools.wdl index c228d6c6..b7a03c17 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -25,6 +25,7 @@ task Complement { File faidx File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" + String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -52,13 +53,13 @@ task Complement { } parameter_meta { + # inputs faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes.", category: "required"} inputBed: {description: "The inputBed to complement.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -66,12 +67,14 @@ task Merge { input { File inputBed String outputBed = "merged.bed" + String memory = "~{512 + ceil(size(inputBed, "M"))}M" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } command { + set -e bedtools merge -i ~{inputBed} > ~{outputBed} } @@ -86,12 +89,12 @@ task Merge { } parameter_meta { + # inputs inputBed: {description: "The bed to merge.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -100,6 +103,7 @@ task MergeBedFiles { input { Array[File]+ bedFiles String outputBed = "merged.bed" + String memory = "~{512 + ceil(size(bedFiles, "M"))}M" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -120,13 +124,14 @@ task MergeBedFiles { time_minutes: timeMinutes docker: dockerImage } + parameter_meta { + # inputs bedFiles: {description: "The bed files to merge.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -139,9 +144,13 @@ task Sort { Boolean chrThenSizeD = false Boolean chrThenScoreA = false Boolean chrThenScoreD = false + String outputBed = "output.sorted.bed" + File? genome File? faidx - String outputBed = "output.sorted.bed" + + String memory = "~{512 + ceil(size(inputBed, "M"))}M" + Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -166,6 +175,8 @@ task Sort { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } } @@ -174,13 +185,15 @@ task Intersect { input { File regionsA File regionsB - # Giving a faidx file will set the sorted option. - File? faidx String outputBed = "intersect.bed" + + File? faidx # Giving a faidx file will set the sorted option. + String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } + Boolean sorted = defined(faidx) command { @@ -205,14 +218,13 @@ task Intersect { } parameter_meta { - faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", - category: "common"} - regionsA: {description: "Region file a to intersect", category: "required"} - regionsB: {description: "Region file b to intersect", category: "required"} - outputBed: {description: "The path to write the output to", category: "advanced"} + # inputs + regionsA: {description: "Region file a to intersect.", category: "required"} + regionsB: {description: "Region file b to intersect.", category: "required"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", category: "common"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/biowdl.wdl b/biowdl.wdl index 838755d9..8a1f9dfd 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -32,6 +32,7 @@ task InputConverter { Boolean checkFileMd5sums=false Boolean old=false + String memory = "128M" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } @@ -52,22 +53,20 @@ task InputConverter { } runtime { - memory: "128M" + memory: memory time_minutes: timeMinutes docker: dockerImage } parameter_meta { + # inputs samplesheet: {description: "The samplesheet to be processed.", category: "required"} - outputFile: {description: "The location the JSON representation of the samplesheet should be written to.", - category: "advanced"} - skipFileCheck: {description: "Whether or not the existance of the files mentioned in the samplesheet should be checked.", - category: "advanced"} - checkFileMd5sums: {description: "Whether or not the MD5 sums of the files mentioned in the samplesheet should be checked.", - category: "advanced"} + outputFile: {description: "The location the JSON representation of the samplesheet should be written to.", category: "advanced"} + skipFileCheck: {description: "Whether or not the existance of the files mentioned in the samplesheet should be checked.", category: "advanced"} + checkFileMd5sums: {description: "Whether or not the MD5 sums of the files mentioned in the samplesheet should be checked.", category: "advanced"} old: {description: "Whether or not the old samplesheet format should be used.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/bowtie.wdl b/bowtie.wdl index b3f3ceae..7fb1b614 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -28,30 +26,31 @@ task Bowtie { Array[File] readsDownstream = [] String outputPath = "mapped.bam" Array[File]+ indexFiles - Int? seedmms - Int? seedlen - Int? k Boolean best = false Boolean strata = false Boolean allowContain = false + + Int? seedmms + Int? seedlen + Int? k String? samRG + String picardXmx = "4G" Int threads = 1 - Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) String memory = "~{5 + ceil(size(indexFiles, "G"))}G" - String picardXmx = "4G" + Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" } # Assume fastq input with -q flag. - # The output always needs to be SAM as it is piped into Picard SortSam + # The output always needs to be SAM as it is piped into Picard SortSam. # Hence, the --sam flag is used. - command { set -e -o pipefail mkdir -p "$(dirname ~{outputPath})" - bowtie -q \ + bowtie \ + -q \ --sam \ ~{"--seedmms " + seedmms} \ ~{"--seedlen " + seedlen} \ @@ -84,24 +83,22 @@ task Bowtie { } parameter_meta { + # inputs readsUpstream: {description: "The first-/single-end fastq files.", category: "required"} readsDownstream: {description: "The second-end fastq files.", category: "common"} outputPath: {description: "The location the output BAM file should be written to.", category: "common"} indexFiles: {description: "The index files for bowtie.", category: "required"} - seedmms: {description: "Equivalent to bowtie's `--seedmms` option.", category: "advanced"} - seedlen: {description: "Equivalent to bowtie's `--seedlen` option.", category: "advanced"} - k: {description: "Equivalent to bowtie's `-k` option.", category: "advanced"} best: {description: "Equivalent to bowtie's `--best` flag.", category: "advanced"} strata: {description: "Equivalent to bowtie's `--strata` flag.", category: "advanced"} allowContain: {description: "Equivalent to bowtie's `--allow-contain` flag.", category: "advanced"} + seedmms: {description: "Equivalent to bowtie's `--seedmms` option.", category: "advanced"} + seedlen: {description: "Equivalent to bowtie's `--seedlen` option.", category: "advanced"} + k: {description: "Equivalent to bowtie's `-k` option.", category: "advanced"} samRG: {description: "Equivalent to bowtie's `--sam-RG` option.", category: "advanced"} - - picardXmx: {description: "The maximum memory available to the picard (used for sorting the output). Should be lower than `memory` to accommodate JVM overhead and bowtie's memory usage.", - category: "advanced"} + picardXmx: {description: "The maximum memory available to the picard (used for sorting the output). Should be lower than `memory` to accommodate JVM overhead and bowtie's memory usage.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 6ea4578d..34cd38a6 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -26,33 +26,35 @@ task Mem { File? read2 BwaIndex bwaIndex String outputPrefix - String? readgroup Boolean sixtyFour = false Boolean usePostalt = false - Int threads = 4 - Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - Int? memoryGb + + String? readgroup + Int? sortThreads + Int? memoryGb + + Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } - # Samtools sort may block the pipe while it is writing data to disk. + # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA-mem2's index files contain 2 BWT indexes of which only one is used. .2bit64 is used by default and + # BWA-mem2's index files contain 2 BWT indexes of which only one is used. .2bit64 is used by default and # .8bit32 is used for avx2. # The larger one of these is the 8bit32 index. Since we do not know beforehand which one is used we need to accomodate for that. - # Using only the 8bit32 index uses 57,5% of the index files. Since bwa-mem2 uses slightly more memory than the index + # Using only the 8bit32 index uses 57,5% of the index files. Since bwa-mem2 uses slightly more memory than the index. # We put it at 62% as a safety factor. That means the memory usage for bwa-mem will be 53G for a human genome. Resulting in 60G total # on 8 cores with samtools with 3 sort threads. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 0.62) + sortMemoryPerThreadGb * totalSortThreads - # The bwa postalt script is out commented as soon as usePostalt = false. + # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e @@ -81,7 +83,7 @@ task Mem { runtime { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. - cpu: threads + cpu: threads memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage @@ -92,21 +94,21 @@ task Mem { read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} - usePostalt: {description: "Whether to use the postalt script from bwa kit."} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} - readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} - threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: "The produced BAM file." + outputBam: {description: "The produced BAM file."} + outputHla: {description: "The produced HLA file."} } } diff --git a/bwa.wdl b/bwa.wdl index fdeb870f..0f09f7a9 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -26,28 +26,30 @@ task Mem { File? read2 BwaIndex bwaIndex String outputPrefix - String? readgroup Boolean sixtyFour = false Boolean usePostalt = false - Int threads = 4 - Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - Int? memoryGb + + String? readgroup + Int? sortThreads + Int? memoryGb + + Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } - # Samtools sort may block the pipe while it is writing data to disk. + # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - # The bwa postalt script is out commented as soon as usePostalt = false. + # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e @@ -76,7 +78,7 @@ task Mem { runtime { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. - cpu: threads + cpu: threads memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage @@ -87,21 +89,21 @@ task Mem { read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} - usePostalt: {description: "Whether to use the postalt script from bwa kit."} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} - readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} - threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: "The produced BAM file." + outputBam: {description: "The produced BAM file."} + outputHla: {description: "The produced HLA file."} } } diff --git a/ccs.wdl b/ccs.wdl index cab15fea..4446937b 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -22,19 +22,20 @@ version 1.0 task CCS { input { + File subreadsFile + String outputPrefix Int minPasses = 3 Int minLength = 10 Int maxLength = 50000 Boolean byStrand = false Float minReadQuality = 0.99 String logLevel = "WARN" - File subreadsFile + File? subreadsIndexFile String? chunkString - String outputPrefix - - Int cores = 2 - String memory = "2G" + + Int threads = 2 + String memory = "4G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" } @@ -49,7 +50,7 @@ task CCS { ~{true="--by-strand" false="" byStrand} \ --min-rq ~{minReadQuality} \ --log-level ~{logLevel} \ - --num-threads ~{cores} \ + --num-threads ~{threads} \ ~{"--chunk " + chunkString} \ ~{"--report-json " + outputPrefix + ".ccs.report.json"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ @@ -65,7 +66,7 @@ task CCS { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -73,17 +74,17 @@ task CCS { parameter_meta { # inputs + subreadsFile: {description: "Subreads input file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} minPasses: {description: "Minimum number of full-length subreads required to generate ccs for a ZMW.", category: "advanced"} minLength: {description: "Minimum draft length before polishing.", category: "advanced"} maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} - subreadsFile: {description: "Subreads input file.", category: "required"} subreadsIndexFile: {description: "Index for the subreads input file, required when using chunkString.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/centrifuge.wdl b/centrifuge.wdl index 1e7a0b45..1637abdd 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -94,13 +94,13 @@ task Build { task Classify { input { + Array[File]+ read1 + Array[File] read2 = [] String inputFormat = "fastq" Boolean phred64 = false Int minHitLength = 22 Array[File]+ indexFiles - Array[File]+ read1 String outputPrefix - Array[File] read2 = [] Int? trim5 Int? trim3 @@ -155,13 +155,13 @@ task Classify { parameter_meta { # inputs + read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} + read2: {description: "List of files containing mate 2s.", category: "common"} inputFormat: {description: "The format of the read file(s).", category: "required"} phred64: {description: "If set to true, phred+64 encoding is used.", category: "required"} minHitLength: {description: "Minimum length of partial hits.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} - read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - read2: {description: "List of files containing mate 2s.", category: "common"} trim5: {description: "Trim bases from 5' (left) end of each read before alignment.", category: "common"} trim3: {description: "Trim bases from 3' (right) end of each read before alignment.", category: "common"} reportMaxDistinct: {description: "It searches for at most distinct, primary assignments for each read or pair.", category: "common"} diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 115c5ca4..844d6990 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -25,6 +25,7 @@ task ChunkedScatter { File inputFile String prefix = "./scatter" Boolean splitContigs = false + Int? chunkSize Int? overlap Int? minimumBasesPerFile @@ -57,15 +58,16 @@ task ChunkedScatter { } parameter_meta { + # inputs inputFile: {description: "Either a bed file describing regiosn of intrest or a sequence dictionary.", category: "required"} prefix: {description: "The prefix for the output files.", category: "advanced"} + splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} chunkSize: {description: "Equivalent to chunked-scatter's `-c` option.", category: "advanced"} overlap: {description: "Equivalent to chunked-scatter's `-o` option.", category: "advanced"} minimumBasesPerFile: {description: "Equivalent to chunked-scatter's `-m` option.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -76,9 +78,11 @@ task ScatterRegions { String prefix = "scatters/scatter-" Boolean splitContigs = false Int scatterSizeMillions = 1000 + Int? scatterSize - Int timeMinutes = 2 + String memory = "256M" + Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" } @@ -105,15 +109,14 @@ task ScatterRegions { } parameter_meta { + # inputs inputFile: {description: "The input file, either a bed file or a sequence dict. Which format is used is detected by the extension: '.bed', '.fai' or '.dict'.", category: "required"} prefix: {description: "The prefix of the ouput files. Output will be named like: .bed, in which N is an incrementing number. Default 'scatter-'.", category: "advanced"} splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/clever.wdl b/clever.wdl index 3a6515f7..75e889b3 100644 --- a/clever.wdl +++ b/clever.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -74,12 +72,12 @@ task Mateclever { indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} - maxOffset: {description: "The maximum center distance between split-read and read-pair deletion to be considered identical.", category: "advanced"} - maxLengthDiff: {description: "The maximum length difference between split-read and read-pair deletion to be considered identical.", category: "advanced"} - cleverMaxDelLength: {description: "The maximum deletion length to look for in Clever predictions.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - threads: {description: "The the number of threads required to run a program", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + cleverMaxDelLength: {description: "The maximum deletion length to look for in Clever predictions.", category: "advanced"} + maxLengthDiff: {description: "The maximum length difference between split-read and read-pair deletion to be considered identical.", category: "advanced"} + maxOffset: {description: "The maximum center distance between split-read and read-pair deletion to be considered identical.", category: "advanced"} + threads: {description: "The the number of threads required to run a program.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -128,8 +126,8 @@ task Prediction { bamIndex: {description: "The index bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - threads: {description: "The the number of threads required to run a program", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + threads: {description: "The the number of threads required to run a program.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/collect-columns.wdl b/collect-columns.wdl index fe41c5e8..67db6179 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -24,12 +24,13 @@ task CollectColumns { input { Array[File]+ inputTables String outputPath + Boolean header = false + Boolean sumOnDuplicateId = false + Int? featureColumn Int? valueColumn Int? separator Array[String]? sampleNames - Boolean header = false - Boolean sumOnDuplicateId = false Array[String]? additionalAttributes File? referenceGtf String? featureAttribute @@ -67,20 +68,20 @@ task CollectColumns { } parameter_meta { + # inputs inputTables: {description: "The tables from which columns should be taken.", category: "required"} outputPath: {description: "The path to which the output should be written.", category: "required"} + header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} + sumOnDuplicateId: {description: "Equivalent to the -S flag of collect-columns.", category: "advanced"} featureColumn: {description: "Equivalent to the -f option of collect-columns.", category: "advanced"} valueColumn: {description: "Equivalent to the -c option of collect-columns.", category: "advanced"} separator: {description: "Equivalent to the -s option of collect-columns.", category: "advanced"} sampleNames: {description: "Equivalent to the -n option of collect-columns.", category: "advanced"} - header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} - sumOnDuplicateId: {description: "Equivalent to the -S flag of collect-columns.", category: "advanced"} additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", category: "advanced"} referenceGtf: {description: "Equivalent to the -g option of collect-columns.", category: "advanced"} featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} - memoryGb: {description: "The maximum amount of memory the job will need in GB", category: "advanced"} + memoryGb: {description: "The maximum amount of memory the job will need in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/common.wdl b/common.wdl index e96cc1c8..b3878bb6 100644 --- a/common.wdl +++ b/common.wdl @@ -45,7 +45,7 @@ task CheckFileMD5 { input { File file String md5 - # By default cromwell expects /bin/bash to be present in the container + # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" @@ -71,7 +71,7 @@ task ConcatenateTextFiles { Boolean zip = false } - # When input and output is both compressed decompression is not needed + # When input and output is both compressed decompression is not needed. String cmdPrefix = if (unzip && !zip) then "zcat " else "cat " String cmdSuffix = if (!unzip && zip) then " | gzip -c " else "" @@ -116,8 +116,8 @@ task Copy { } task CreateLink { - # Making this of type File will create a link to the copy of the file in the execution - # folder, instead of the actual file. + # Making this of type File will create a link to the copy of the file in + # the execution folder, instead of the actual file. # This cannot be propperly call-cached or used within a container. input { String inputFile @@ -182,6 +182,7 @@ task TextToFile { input { String text String outputFile = "out.txt" + Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -194,18 +195,19 @@ task TextToFile { File out = outputFile } - parameter_meta { - text: {description: "The text to print", category: "required"} - outputFile: {description: "The name of the output file", category: "common"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } runtime { memory: "1G" time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + text: {description: "The text to print.", category: "required"} + outputFile: {description: "The name of the output file.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } } task YamlToJson { @@ -213,11 +215,12 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - Int timeMinutes = 1 String memory = "128M" + Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } + command { set -e mkdir -p "$(dirname ~{outputJson})" @@ -230,6 +233,7 @@ task YamlToJson { json.dump(content, output_json) CODE } + output { File json = outputJson } @@ -241,12 +245,12 @@ task YamlToJson { } parameter_meta { + # inputs yaml: {description: "The YAML file to convert.", category: "required"} outputJson: {description: "The location the output JSON file should be written to.", category: "advanced"} memory: {description: "The maximum amount of memory the job will need.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/cutadapt.wdl b/cutadapt.wdl index 7faeaff1..74f57912 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -32,6 +32,14 @@ task Cutadapt { Array[String] adapterRead2 = [] Array[String] frontRead2 = [] Array[String] anywhereRead2 = [] + String reportPath = "cutadapt_report.txt" + # Cutadapt compresses the zipped output files with a ridiculously + # high compression level (5 or 6). + # This is not the fast compression preset. It takes up to 400% more + # CPU time for a 20% reduction in file size. + # Hence we use compression level 1 here. + Int compressionLevel = 1 # This only affects outputs with the .gz suffix. + Boolean? interleaved String? pairFilter Float? errorRate @@ -52,7 +60,7 @@ task Cutadapt { String? stripSuffix String? prefix String? suffix - Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads. + Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads. Int? maximumLength Int? maxN Boolean? discardTrimmed @@ -73,11 +81,7 @@ task Cutadapt { Boolean? bwa Boolean? zeroCap Boolean? noZeroCap - String reportPath = "cutadapt_report.txt" - # Cutadapt compresses the zipped output files with a ridiculously high compression level (5 or 6). - # This is not the fast compression preset. It takes up to 400% more CPU time for a 20% reduction in file size. - # Hence we use compression level 1 here. - Int compressionLevel = 1 # This only affects outputs with the .gz suffix. + Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) @@ -152,8 +156,8 @@ task Cutadapt { output{ File cutRead1 = read1output - File? cutRead2 = read2output File report = reportPath + File? cutRead2 = read2output File? tooLongOutput=tooLongOutputPath File? tooShortOutput=tooShortOutputPath File? untrimmedOutput=untrimmedOutputPath @@ -173,22 +177,19 @@ task Cutadapt { } parameter_meta { + # inputs read1: {description: "The first or single end fastq file to be run through cutadapt.", category: "required"} read2: {description: "An optional second end fastq file to be run through cutadapt.", category: "common"} read1output: {description: "The name of the resulting first or single end fastq file.", category: "common"} read2output: {description: "The name of the resulting second end fastq file.", category: "common"} - adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "common"} - front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced"} - anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced"} - adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", - category: "common"} - frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced"} - anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced"} + adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "common"} + front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"} + anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"} + adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", category: "common"} + frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"} + anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"} + reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", category: "common"} + compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"} interleaved: {description: "Equivalent to cutadapt's --interleaved flag.", category: "advanced"} pairFilter: {description: "Equivalent to cutadapt's --pair-filter option.", category: "advanced"} errorRate: {description: "Equivalent to cutadapt's --error-rate option.", category: "advanced"} @@ -230,13 +231,9 @@ task Cutadapt { bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"} zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"} noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"} - reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", - category: "common"} - compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"} cores: {description: "The number of cores to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/deepvariant.wdl b/deepvariant.wdl index f5661886..20bf8e27 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -28,6 +28,7 @@ task RunDeepVariant { File inputBamIndex String modelType String outputVcf + String? postprocessVariantsExtraArgs File? customizedModel Int? numShards @@ -43,7 +44,6 @@ task RunDeepVariant { command { set -e - /opt/deepvariant/bin/run_deepvariant \ --ref ~{referenceFasta} \ --reads ~{inputBam} \ @@ -59,36 +59,36 @@ task RunDeepVariant { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } output { File outputVCF = outputVcf File outputVCFIndex = outputVCF + ".tbi" + Array[File] outputVCFStatsReport = glob("*.visual_report.html") File? outputGVCF = outputGVcf File? outputGVCFIndex = outputGVcf + ".tbi" - Array[File] outputVCFStatsReport = glob("*.visual_report.html") } - + parameter_meta { - referenceFasta: {description: "Genome reference to use", category: "required"} + # inputs + referenceFasta: {description: "Genome reference to use.", category: "required"} referenceFastaIndex: {description: "Index for the genome reference file.", category: "required"} inputBam: {description: "Aligned, sorted, indexed BAM file containing the reads we want to call.", category: "required"} inputBamIndex: {description: "Index for the input bam file.", category: "required"} - modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag", category: "required"} + modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag.", category: "required"} outputVcf: {description: "Path where we should write VCF file.", category: "required"} - customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used", category: "advanced"} + postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} + customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used"., category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} - postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From b131d926dd3cb7e2dc59adecb015fa09d1e3d3bc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:11:41 +0100 Subject: [PATCH 360/902] Edit another batch of tasks to uniform layout. --- bam2fastx.wdl | 10 +- delly.wdl | 6 +- fastqc.wdl | 49 ++-- fastqsplitter.wdl | 22 +- flash.wdl | 12 +- gatk.wdl | 612 +++++++++++++++++++++------------------------- gffcompare.wdl | 2 +- 7 files changed, 338 insertions(+), 375 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index e8884ab0..1b911dbb 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -37,18 +37,18 @@ task Bam2Fasta { command { set -e - mkdir -p "$(dirname ~{outputPrefix})"' + mkdir -p "$(dirname ~{outputPrefix})" # Localise the bam and pbi files so they are next to each other in the # current folder. bamFiles="" - for bamFile in ~{sep=" " bam}; + for bamFile in ~{sep=" " bam} do ln ${bamFile} . bamFiles=${bamFiles}" $(basename ${bamFile})" done - for index in ~{sep=" " bamIndex}; + for index in ~{sep=" " bamIndex} do ln ${index} . done @@ -110,13 +110,13 @@ task Bam2Fastq { # Localise the bam and pbi files so they are next to each other in the # current folder. bamFiles="" - for bamFile in ~{sep=" " bam}; + for bamFile in ~{sep=" " bam} do ln ${bamFile} . bamFiles=${bamFiles}" $(basename ${bamFile})" done - for index in ~{sep=" " bamIndex}; + for index in ~{sep=" " bamIndex} do ln ${index} . done diff --git a/delly.wdl b/delly.wdl index f708f494..ffe9023a 100644 --- a/delly.wdl +++ b/delly.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -59,9 +57,9 @@ task CallSV { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} - referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The memory required to run the programs", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/fastqc.wdl b/fastqc.wdl index 04b6813f..dd3dfc2e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -29,6 +29,7 @@ task Fastqc { Boolean noFilter = false Boolean extract = false Boolean nogroup = false + Int? minLength String? format File? contaminants @@ -37,32 +38,35 @@ task Fastqc { Int? kmers String? dir - Int threads = 1 # Set javaXmx a little high. Equal to fastqc default with 7 threads. # This is because some fastq files need more memory. 2G per core # is a nice cluster default, so we use all the rest of the memory for # fastqc so we should have as little OOM crashes as possible even with # weird edge case fastq's. - String javaXmx="1750M" + String javaXmx="1750M" + Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" - Array[File]? NoneArray - File? NoneFile + String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0 + + Array[File]? noneArray + File? noneFile } # Chops of the .gz extension if present. - # The Basename needs to be taken here. Otherwise paths might differ between similar jobs. + # The Basename needs to be taken here. Otherwise paths might differ + # between similar jobs. String name = basename(sub(seqFile, "\.gz$","")) - # This regex chops of the extension and replaces it with _fastqc for the reportdir. + # This regex chops of the extension and replaces it with _fastqc for + # the reportdir. # Just as fastqc does it. String reportDir = outdirPath + "/" + sub(name, "\.[^\.]*$", "_fastqc") - # We reimplement the perl wrapper here. This has the advantage that it gives - # us more control over the amount of memory used. + # We reimplement the perl wrapper here. This has the advantage that it + # gives us more control over the amount of memory used. command <<< set -e - mkdir -p ~{outdirPath} + mkdir -p "~{outdirPath}" FASTQC_DIR="/usr/local/opt/fastqc-0.11.9" export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \ @@ -86,23 +90,24 @@ task Fastqc { >>> output { - File? rawReport = if extract then reportDir + "/fastqc_data.txt" else NoneFile File htmlReport = reportDir + ".html" File reportZip = reportDir + ".zip" - File? summary = if extract then reportDir + "/summary.txt" else NoneFile - Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else NoneArray + File? summary = if extract then reportDir + "/summary.txt" else noneFile + File? rawReport = if extract then reportDir + "/fastqc_data.txt" else noneFile + Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else noneArray } runtime { cpu: threads memory: memory - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs seqFile: {description: "A fastq file.", category: "required"} - outdirPath: {description: "The path to write the output to", catgory: "required"} + outdirPath: {description: "The path to write the output to.", catgory: "required"} casava: {description: "Equivalent to fastqc's --casava flag.", category: "advanced"} nano: {description: "Equivalent to fastqc's --nano flag.", category: "advanced"} noFilter: {description: "Equivalent to fastqc's --nofilter flag.", category: "advanced"} @@ -115,18 +120,16 @@ task Fastqc { limits: {description: "Equivalent to fastqc's --limits option.", category: "advanced"} kmers: {description: "Equivalent to fastqc's --kmers option.", category: "advanced"} dir: {description: "Equivalent to fastqc's --dir option.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} threads: {description: "The number of cores to use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { WDL_AID: { - exclude: ["NoneFile", "NoneArray"] + exclude: ["noneFile", "noneArray"] } } } @@ -155,14 +158,14 @@ task GetConfiguration { } runtime { - memory: "2G" # Needs more than 1 to pull the docker image + memory: "2G" # Needs more than 1 to pull the docker image. time_minute: timeMinutes docker: dockerImage } parameter_meta { + # inputs timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/fastqsplitter.wdl b/fastqsplitter.wdl index c523cf8a..25a50954 100644 --- a/fastqsplitter.wdl +++ b/fastqsplitter.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -26,19 +24,24 @@ task Fastqsplitter { input { File inputFastq Array[String]+ outputPaths - String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1" + Int? compressionLevel Int? threadsPerFile - # fastqplitter utilizes one thread per input file and one or more threads per output file + one thread for the application. - # Since a compression level of 1 is used, each output file uses approx 0.5 cores. + + # fastqplitter utilizes one thread per input file and one or + # more threads per output file + one thread for the application. + # Since a compression level of 1 is used, each output file + # uses approx 0.5 cores. Int cores = 1 + ceil(0.5 * length(outputPaths)) + String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1" } # Busybox mkdir does not accept multiple paths. command <<< set -e for FILE in ~{sep=' ' outputPaths} - do mkdir -p "$(dirname $FILE)" + do + mkdir -p "$(dirname ${FILE})" done fastqsplitter \ ~{"-c " + compressionLevel} \ @@ -51,15 +54,16 @@ task Fastqsplitter { Array[File] chunks = outputPaths } - # Using very safe margins here. 10MB/300MB per outputfile is used for single-threaded/multi-threaded compression. + # Using very safe margins here. 10MB/300MB per outputfile is used for + # single-threaded/multi-threaded compression. Float memoryPerFile = if select_first([threadsPerFile, 1]) > 1 then 0.40 else 0.02 Int fastqsplitterMemory = ceil(0.100 + memoryPerFile * length(outputPaths)) - # Make sure a minimum of 2 GB is present to pull the singularity image + # Make sure a minimum of 2 GB is present to pull the singularity image. Int memory = if fastqsplitterMemory <= 2 then 2 else fastqsplitterMemory runtime { + cpu: cores memory: "~{memory}G" docker: dockerImage - cpu: cores } } diff --git a/flash.wdl b/flash.wdl index 6e704921..c4554c50 100644 --- a/flash.wdl +++ b/flash.wdl @@ -24,13 +24,14 @@ import "common.wdl" as common task Flash { input { - String? preCommand FastqPair inputFastq String outdirPath String outPrefix = "flash" + Boolean compress = true + + String? preCommand Int? minOverlap Int? maxOverlap - Boolean compress = true Int threads = 2 String memory = "2G" @@ -55,8 +56,8 @@ task Flash { File notCombined1 = outdirPath + "/" + outPrefix + ".notCombined_1.fastq.gz" File notCombined2 = outdirPath + "/" + outPrefix + ".notCombined_2.fastq.gz" FastqPair notCombined = object { - R1: notCombined1, - R2: notCombined2 + R1: notCombined1, + R2: notCombined2 } File hist = outdirPath + "/" + outPrefix + ".hist" File histogram = outdirPath + "/" + outPrefix + ".histogram" @@ -66,5 +67,4 @@ task Flash { cpu: threads memory: memory } - -} \ No newline at end of file +} diff --git a/gatk.wdl b/gatk.wdl index 12416dda..cc5d1de5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -28,12 +28,13 @@ task AnnotateIntervals { String annotatedIntervalsPath = "intervals.annotated.tsv" File intervals String intervalMergingRule = "OVERLAPPING_ONLY" + Int featureQueryLookahead = 1000000 + File? mappabilityTrack File? segmentalDuplicationTrack - Int featureQueryLookahead = 1000000 - String memory = "3G" String javaXmx = "2G" + String memory = "3G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -57,9 +58,9 @@ task AnnotateIntervals { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -71,17 +72,15 @@ task AnnotateIntervals { intervalMergingRule: {description: "Equivalent to gatk AnnotateIntervals' `--interval-merging-rule` option.", category: "advanced"} mappabilityTrack: {description: "Equivalent to gatk AnnotateIntervals' `--mappability-track` option.", category: "common"} segmentalDuplicationTrack: {description: "Equivalent to gatk AnnotateIntervals' `--segmenta-duplicarion-track` option.", category: "common"} - featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option", category: "advanced"} + featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Apply Base Quality Score Recalibration (BQSR) model +# Apply Base Quality Score Recalibration (BQSR) model. task ApplyBQSR { input { File inputBam @@ -93,9 +92,11 @@ task ApplyBQSR { File referenceFastaDict File referenceFastaFai - Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 2048 - Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. + Int memoryMb = javaXmxMb + 512 + # This will likely be used with intervals, as such size based + # estimation can't be used. + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -124,33 +125,29 @@ task ApplyBQSR { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The BAM file which should be recalibrated.", category: "required"} inputBamIndex: {description: "The input BAM file's index.", category: "required"} outputBamPath: {description: "The location the resulting BAM file should be written.", category: "required"} recalibrationReport: {description: "The BQSR report the be used for recalibration.", category: "required"} sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Generate Base Quality Score Recalibration (BQSR) model +# Generate Base Quality Score Recalibration (BQSR) model. task BaseRecalibrator { input { File inputBam @@ -159,14 +156,15 @@ task BaseRecalibrator { Array[File] sequenceGroupInterval = [] Array[File] knownIndelsSitesVCFs = [] Array[File] knownIndelsSitesVCFIndexes = [] - File? dbsnpVCF - File? dbsnpVCFIndex File referenceFasta File referenceFastaDict File referenceFastaFai - Int memoryMb = javaXmxMb + 512 + File? dbsnpVCF + File? dbsnpVCFIndex + Int javaXmxMb = 1024 + Int memoryMb = javaXmxMb + 512 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -190,42 +188,39 @@ task BaseRecalibrator { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The BAM file to generate a BQSR report for.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} recalibrationReportPath: {description: "The location to write the BQSR report to.", category: "required"} sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"} knownIndelsSitesVCFs: {description: "VCF files with known indels.", category: "advanced"} knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs.", category: "advanced"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task CalculateContamination { input { File tumorPileups + File? normalPileups - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -246,20 +241,19 @@ task CalculateContamination { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs tumorPileups: {description: "The pileup summary of a tumor/case sample.", category: "required"} normalPileups: {description: "The pileup summary of the normal/control sample.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -268,8 +262,8 @@ task CallCopyRatioSegments { String outputPrefix File copyRatioSegments - String memory = "3G" String javaXmx = "2G" + String memory = "3G" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -289,20 +283,19 @@ task CallCopyRatioSegments { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputPrefix: {description: "The prefix for the output files.", category: "required"} copyRatioSegments: {description: "The copy ratios file generated by gatk ModelSegments.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -310,15 +303,16 @@ task CollectAllelicCounts { input { String allelicCountsPath = "allelic_counts.tsv" File commonVariantSites - File? commonVariantSitesIndex File inputBam File inputBamIndex File referenceFasta File referenceFastaDict File referenceFastaFai - String memory = "11G" + File? commonVariantSitesIndex + String javaXmx = "10G" + String memory = "11G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -339,26 +333,25 @@ task CollectAllelicCounts { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs allelicCountsPath: {description: "The path the output should be written to.", category: "advanced"} commonVariantSites: {description: "Interval list or vcf of common variant sites (to retrieve the allelic counts for).", category: "required"} - commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"} inputBam: {description: "The BAM file to generate counts for.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -373,8 +366,8 @@ task CollectReadCounts { File referenceFastaFai String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "8G" String javaXmx = "7G" + String memory = "8G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -397,12 +390,13 @@ task CollectReadCounts { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs countsPath: {description: "The location the output should be written to.", category: "advanced"} intervals: {description: "The intervals to collect counts for.", category: "required"} inputBam: {description: "The BAM file to determine the coverage for.", category: "required"} @@ -411,12 +405,10 @@ task CollectReadCounts { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} intervalMergingRule: {description: "Equivalent to gatk CollectReadCounts' `--interval-merging-rule` option.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -430,8 +422,8 @@ task CombineGVCFs { File referenceFastaDict File referenceFastaFai - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -453,28 +445,24 @@ task CombineGVCFs { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gvcfFiles: {description: "The GVCF files to be combined.", category: "required"} gvcfFilesIndex: {description: "The indexes for the GVCF files.", caregory: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} outputPath: {description: "The location the combined GVCF should be written to.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -486,12 +474,12 @@ task CombineVariants { String genotypeMergeOption = "UNIQUIFY" String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED" Array[String]+ identifiers - Array[File]+ variantVcfs # follow "identifiers" array order + Array[File]+ variantVcfs # Follow "identifiers" array order. Array[File]+ variantIndexes String outputPath - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" } @@ -499,17 +487,17 @@ task CombineVariants { command <<< set -e mkdir -p "$(dirname ~{outputPath})" - - # build "-V: " arguments according to IDs and VCFs to merge - # Make sure commands are run in bash + # Build "-V: " arguments according to IDs + # and VCFs to merge. + # Make sure commands are run in bash. V_args=$(bash -c ' set -eu ids=(~{sep=" " identifiers}) vars=(~{sep=" " variantVcfs}) for (( i = 0; i < ${#ids[@]}; ++i )) - do + do printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" - done + done ') java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 -jar /usr/GenomeAnalysisTK.jar \ -T CombineVariants \ @@ -526,12 +514,13 @@ task CombineVariants { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} @@ -540,14 +529,11 @@ task CombineVariants { identifiers: {description: "The sample identifiers in the same order as variantVcfs.", category: "required"} variantVcfs: {description: "The input VCF files in the same order as identifiers.", category: "required"} variantIndexes: {description: "The indexes of the input VCF files.", category: "required"} - outputPath: {description: "The location the output should be written to", category: "required"} - + outputPath: {description: "The location the output should be written to.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -555,10 +541,11 @@ task CreateReadCountPanelOfNormals { input { String PONpath = "PON.hdf5" Array[File]+ readCountsFiles + File? annotatedIntervals - String memory = "8G" String javaXmx = "7G" + String memory = "8G" Int timeMinutes = 5 String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason... } @@ -578,34 +565,33 @@ task CreateReadCountPanelOfNormals { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs PONpath: {description: "The location the PON should be written to.", category: "common"} readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "required"} - annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", - category: "advanced"} + annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task DenoiseReadCounts { input { - File? PON - File? annotatedIntervals File readCounts String outputPrefix - String memory = "5G" + File? PON + File? annotatedIntervals + String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -628,23 +614,21 @@ task DenoiseReadCounts { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"} - annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.", - category: "advanced"} + # inputs readCounts: {description: "The read counts file as generated by CollectReadCounts.", category: "required"} outputPrefix: {description: "The prefix for the output files.", category: "required"} + PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"} + annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -656,14 +640,15 @@ task FilterMutectCalls { File unfilteredVcf File unfilteredVcfIndex String outputVcf + Int uniqueAltReadCount = 4 + File mutect2Stats + File? contaminationTable File? mafTumorSegments File? artifactPriors - Int uniqueAltReadCount = 4 - File mutect2Stats - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -692,41 +677,39 @@ task FilterMutectCalls { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} unfilteredVcf: {description: "An unfiltered VCF file as produced by Mutect2.", category: "required"} unfilteredVcfIndex: {description: "The index of the unfiltered VCF file.", category: "required"} outputVcf: {description: "The location the filtered VCF file should be written.", category: "required"} + uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"} + mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"} contaminationTable: {description: "Equivalent to FilterMutectCalls' `--contamination-table` option.", category: "advanced"} mafTumorSegments: {description: "Equivalent to FilterMutectCalls' `--tumor-segmentation` option.", category: "advanced"} artifactPriors: {description: "Equivalent to FilterMutectCalls' `--ob-priors` option.", category: "advanced"} - uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"} - mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Combine multiple recalibration tables from scattered BaseRecalibrator runs +# Combine multiple recalibration tables from scattered BaseRecalibrator runs. task GatherBqsrReports { input { Array[File] inputBQSRreports String outputReportPath - Int memoryMb = 256 + javaXmxMb Int javaXmxMb = 256 + Int memoryMb = 256 + javaXmxMb Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -745,21 +728,19 @@ task GatherBqsrReports { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBQSRreports: {description: "The BQSR reports to be merged.", category: "required"} outputReportPath: {description: "The location of the combined BQSR report.", category: "required"} - + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -770,9 +751,11 @@ task GenomicsDBImport { Array[File]+ intervals String genomicsDBWorkspacePath = "genomics_db" String genomicsDBTarFile = "genomics_db.tar.gz" + String? tmpDir - String memory = "5G" + String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -794,25 +777,23 @@ task GenomicsDBImport { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gvcfFiles: {description: "The gvcfFiles to be merged.", category: "required"} gvcfFilesIndex: {description: "Indexes for the gvcfFiles.", category: "required"} intervals: {description: "intervals over which to operate.", category: "required"} - genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored", category: "advanced"} - genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored", category: "advanced"} - tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers", - category: "advanced"} + genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored.", category: "advanced"} + genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored.", category: "advanced"} + tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -820,18 +801,19 @@ task GenotypeGVCFs { input { File gvcfFile File gvcfFileIndex - Array[File]? intervals String outputPath File referenceFasta File referenceFastaDict File referenceFastaFai Array[String] annotationGroups = ["StandardAnnotation"] + + Array[File]? intervals File? dbsnpVCF File? dbsnpVCFIndex File? pedigree - String memory = "7G" String javaXmx = "6G" + String memory = "7G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -854,35 +836,31 @@ task GenotypeGVCFs { output { File outputVCF = outputPath File outputVCFIndex = outputPath + ".tbi" - } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"} gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"} - intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"} outputPath: {description: "The location to write the output VCF file to.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - annotationGroups: {description: "Which annotation groups will be used for the annotation", category: "advanced"} + annotationGroups: {description: "Which annotation groups will be used for the annotation.", category: "advanced"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} + pedigree: {description: "Pedigree file for determining the population \"founders\".", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -896,8 +874,8 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -917,12 +895,13 @@ task GetPileupSummaries { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs sampleBam: {description: "A BAM file for which a pileup should be created.", category: "required"} sampleBamIndex: {description: "The index of the input BAM file.", category: "required"} variantsForContamination: {description: "A VCF file with common variants.", category: "required"} @@ -930,13 +909,10 @@ task GetPileupSummaries { sitesForContamination: {description: "A bed file describing regions to operate on.", category: "required"} sitesForContaminationIndex: {description: "The index for the bed file.", category: "required"} outputPrefix: {description: "The prefix for the ouput.", category: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -945,26 +921,27 @@ task HaplotypeCaller { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Array[File]+? intervalList - Array[File]+? excludeIntervalList String outputPath File referenceFasta File referenceFastaIndex File referenceFastaDict + Boolean gvcf = false + String emitRefConfidence = if gvcf then "GVCF" else "NONE" + Boolean dontUseSoftClippedBases = false + + Array[File]+? intervalList + Array[File]+? excludeIntervalList Float? contamination File? dbsnpVCF File? dbsnpVCFIndex File? pedigree Int? ploidy String? outputMode - Boolean gvcf = false - String emitRefConfidence = if gvcf then "GVCF" else "NONE" - Boolean dontUseSoftClippedBases = false Float? standardMinConfidenceThresholdForCalling - Int memoryMb = javaXmxMb + 512 - # Memory increases with time used. 4G should cover most use cases. Int javaXmxMb = 4096 + # Memory increases with time used. 4G should cover most use cases. + Int memoryMb = javaXmxMb + 512 Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -995,50 +972,44 @@ task HaplotypeCaller { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} - excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} outputPath: {description: "The location to write the output to.", category: "required"} - ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} - gvcf: {description: "Whether the output should be a gvcf", category: "common"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaIndex: {description: "The index for the reference fasta file.", category: "required"} - contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} - outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.", - category: "advanced"} - emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'", - category: "advanced"} + gvcf: {description: "Whether the output should be a gvcf.", category: "common"} + emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'.", category: "advanced"} dontUseSoftClippedBases: {description: "Do not use soft-clipped bases. Should be 'true' for RNA variant calling.", category: "common"} - standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"} + intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} + excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} + contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} + pedigree: {description: "Pedigree file for determining the population \"founders\".", category: "common"} + ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} + outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.", category: "advanced"} + standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1056,19 +1027,18 @@ task LearnReadOrientationModel { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs f1r2TarGz: {description: "A f1r2TarGz file outputed by mutect2.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1076,8 +1046,8 @@ task MergeStats { input { Array[File]+ stats - String memory = "15G" String javaXmx = "14G" + String memory = "15G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1095,19 +1065,18 @@ task MergeStats { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs stats: {description: "Statistics files to be merged.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1117,14 +1086,13 @@ task ModelSegments { String outputPrefix File denoisedCopyRatios File allelicCounts - File? normalAllelicCounts - Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts) - then 0 - else 30 + Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts) then 0 else 30 Int maximumNumberOfSmoothingIterations = 10 - String memory = "11G" + File? normalAllelicCounts + String javaXmx = "10G" + String memory = "11G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1145,7 +1113,6 @@ task ModelSegments { output { File hetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.tsv" - File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" File copyRatioSegments = outputDir + "/" + outputPrefix + ".cr.seg" File copyRatioCBS = outputDir + "/" + outputPrefix + ".cr.igv.seg" File alleleFractionCBS = outputDir + "/" + outputPrefix + ".af.igv.seg" @@ -1155,29 +1122,28 @@ task ModelSegments { File modeledSegments = outputDir + "/" + outputPrefix + ".modelFinal.seg" File copyRatioParameters = outputDir + "/" + outputPrefix + ".modelFinal.cr.param" File alleleFractionParameters = outputDir + "/" + outputPrefix + ".modelFinal.af.param" + File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" } runtime { - docker: dockerImage - time_minute: timeMinutes memory: memory + time_minute: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputDir: {description: "The directory to write the ouput to.", category: "common"} outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} allelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts.", category: "required" } - normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"} minimumTotalAlleleCountCase: {description: "Equivalent to gatk ModelSeqments' `--minimum-total-allele-count-case` option.", category: "advanced"} maximumNumberOfSmoothingIterations: {description: "Equivalent to gatk ModelSeqments' `--maximum-number-of-smoothing-iterations` option.", category: "advanced"} - + normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1190,17 +1156,18 @@ task MuTect2 { File referenceFastaFai String outputVcf String tumorSample + String f1r2TarGz = "f1r2.tar.gz" + Array[File]+ intervals + String outputStats = outputVcf + ".stats" + String? normalSample File? germlineResource File? germlineResourceIndex File? panelOfNormals File? panelOfNormalsIndex - String f1r2TarGz = "f1r2.tar.gz" - Array[File]+ intervals - String outputStats = outputVcf + ".stats" - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1229,12 +1196,13 @@ task MuTect2 { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} @@ -1242,20 +1210,18 @@ task MuTect2 { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputVcf: {description: "The location to write the output VCF file to.", category: "required"} tumorSample: {description: "The name of the tumor/case sample.", category: "required"} + f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"} + intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"} + outputStats: {description: "The location the output statistics should be written to.", category: "advanced"} normalSample: {description: "The name of the normal/control sample.", category: "common"} germlineResource: {description: "Equivalent to Mutect2's `--germline-resource` option.", category: "advanced"} germlineResourceIndex: {description: "The index for the germline resource.", category: "advanced"} panelOfNormals: {description: "Equivalent to Mutect2's `--panel-of-normals` option.", category: "advanced"} panelOfNormalsIndex: {description: "The index for the panel of normals.", category: "advanced"} - f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"} - intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"} - outputStats: {description: "The location the output statistics should be written to.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1266,10 +1232,11 @@ task PlotDenoisedCopyRatios { String outputPrefix File standardizedCopyRatios File denoisedCopyRatios + Int? minimumContigLength - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1289,32 +1256,31 @@ task PlotDenoisedCopyRatios { output { File denoisedCopyRatiosPlot = outputDir + "/" + outputPrefix + ".denoised.png" - File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" File standardizedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".standardizedMAD.txt" File denoisedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".denoisedMAD.txt" File deltaMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".deltaMAD.txt" File deltaScaledMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".scaledDeltaMAD.txt" + File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} outputDir: {description: "The directory to write the ouput to.", category: "common"} outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} - denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} standardizedCopyRatios: {description: "The standardized copy ratios as generated by DenoiseReadCounts.", category: "required"} + denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1326,10 +1292,11 @@ task PlotModeledSegments { File denoisedCopyRatios File segments File allelicCounts + Int? minimumContigLength - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1353,12 +1320,13 @@ task PlotModeledSegments { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} outputDir: {description: "The directory to write the ouput to.", category: "common"} outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} @@ -1366,12 +1334,10 @@ task PlotModeledSegments { segments: {description: "The modeled segments as generated by ModelSegments.", category: "required"} allelicCounts: {description: "The hetrozygous allelic counts as generated by ModelSegments.", category: "required"} minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1380,14 +1346,15 @@ task PreprocessIntervals { File referenceFasta File referenceFastaDict File referenceFastaFai - File? intervals String outputIntervalList = "bins.interval_list" Int binLength = if defined(intervals) then 0 else 1000 Int padding = if defined(intervals) then 250 else 0 String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "4G" + File? intervals + String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1411,41 +1378,42 @@ task PreprocessIntervals { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - referenceFasta: {description: "The reference fasta file..", category: "required"} + # inputs + referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"} outputIntervalList: {description: "The location the output should be written to.", category: "advanced"} binLength: {description: "The size of the bins to be created. Should be 0 for targeted/exome sequencing.", category: "advanced"} padding: {description: "The padding to be added to the bins. Should be 0 if contiguos binning is used, eg with WGS.", category: "advanced"} intervalMergingRule: {description: "Equivalent to gatk PreprocessIntervals' `--interval-merging-rule` option.", category: "advanced"} + intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task SelectVariants { input { + File inputVcf + File inputVcfIndex File referenceFasta File referenceFastaDict File referenceFastaFai - File inputVcf - File inputVcfIndex String outputPath = "output.vcf.gz" - String? selectTypeToInclude Array[File] intervals = [] - String memory = "5G" + + String? selectTypeToInclude + String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1468,29 +1436,25 @@ task SelectVariants { } runtime { - docker: dockerImage - time_minute: timeMinutes memory: memory + time_minute: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputVcf: {description: "The VCF input file.", category: "required"} inputVcfIndex: {description: "The input VCF file's index.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - selectTypeToInclude: {description: "Select only a certain type of variants from the input file", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "advanced"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} - + selectTypeToInclude: {description: "Select only a certain type of variants from the input file.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1504,8 +1468,8 @@ task SplitNCigarReads { String outputBam Array[File] intervals = [] - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1527,28 +1491,24 @@ task SplitNCigarReads { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The BAM file for which spliced reads should be split.", category: "required"} inputBamIndex: {description: "The input BAM file's index.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputBam: {description: "The location the output BAM file should be written.", category: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1558,11 +1518,6 @@ task VariantEval { Array[File] evalVcfsIndex Array[File] comparisonVcfs = [] Array[File] comparisonVcfsIndex = [] - File? referenceFasta - File? referenceFastaDict - File? referenceFastaFai - File? dbsnpVCF - File? dbsnpVCFIndex Array[File] intervals = [] String outputPath = "eval.table" Boolean doNotUseAllStandardModules = false @@ -1572,8 +1527,14 @@ task VariantEval { Array[String] samples = [] Boolean mergeEvals = false - String memory = "5G" + File? referenceFasta + File? referenceFastaDict + File? referenceFastaFai + File? dbsnpVCF + File? dbsnpVCFIndex + String javaXmx = "4G" + String memory = "5G" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" @@ -1604,35 +1565,37 @@ task VariantEval { runtime { cpu: 1 - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } + parameter_meta { + # inputs evalVcfs: {description: "Variant sets to evaluate.", category: "required"} evalVcfsIndex: {description: "Indexes for the variant sets.", category: "required"} comparisonVcfs: {description: "Compare set vcfs.", category: "advanced"} comparisonVcfsIndex: {description: "Indexes for the compare sets.", category: "advanced"} - evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true)", category: "common"} - stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true)", category: "common"} - samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"} # Advanced because this description is impossible to understand... - mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track", category: "common"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} + outputPath: {description: "The location the output table should be written.", category: "advanced"} doNotUseAllStandardModules: {description: "Do not use the standard modules by default (instead, only those that are specified with the evalModules option).", category: "common"} doNotUseAllStandardStratifications: {description: "Do not use the standard stratification modules by default (instead, only those that are specified with the stratificationModules option).", category: "common"} + evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true).", category: "common"} + stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true).", category: "common"} + samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"} + mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "common"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "common"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "common"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - outputPath: {description: "The location the output table should be written.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } + task VariantFiltration { input { File inputVcf @@ -1644,8 +1607,8 @@ task VariantFiltration { Array[String]+ filterArguments Array[File] intervals = [] - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1668,29 +1631,24 @@ task VariantFiltration { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputVcf: {description: "The VCF to be filtered.", category: "required"} inputVcfIndex: {description: "The input VCF file's index.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2'].", category: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} - filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2']", - category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/gffcompare.wdl b/gffcompare.wdl index e5f62b5e..5d80f619 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -147,4 +147,4 @@ task GffCompare { exclude: ["noneFile"] } } -} \ No newline at end of file +} From 08d6519a05a9e297decbe81e0e29c633ea07e14f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:27:38 +0100 Subject: [PATCH 361/902] Try to fix Travis error. --- bam2fastx.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 1b911dbb..0585de23 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -112,13 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - ln ${bamFile} . - bamFiles=${bamFiles}" $(basename ${bamFile})" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln ${index} . + ln $index . done bam2fastq \ @@ -126,7 +126,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} + $bamFiles } output { From 9f77348d7a353e93b1f2a57b02942a93107ea634 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:30:59 +0100 Subject: [PATCH 362/902] Fix second task as well. --- bam2fastx.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0585de23..2ad08581 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - ln ${bamFile} . - bamFiles=${bamFiles}" $(basename ${bamFile})" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln ${index} . + ln $index . done bam2fasta \ From 840a37d19727ee6edb790287cfd447e9964ce669 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:39:16 +0100 Subject: [PATCH 363/902] Fix a third Travis error. --- deepvariant.wdl | 2 +- gffcompare.wdl | 44 +++++++++++++++++++++++--------------------- gffread.wdl | 2 +- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 20bf8e27..8b08e111 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -81,7 +81,7 @@ task RunDeepVariant { modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag.", category: "required"} outputVcf: {description: "Path where we should write VCF file.", category: "required"} postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} - customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used"., category: "advanced"} + customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} diff --git a/gffcompare.wdl b/gffcompare.wdl index 5d80f619..8bd53091 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -22,16 +22,11 @@ version 1.0 task GffCompare { input { - File? inputGtfList Array[File] inputGtfFiles File referenceAnnotation - String? outputDir - String outPrefix = "gffcmp" # gffcmp is the default used by the program as well. This - # needs to be defined in order for the output values to be consistent and correct. - File? genomeSequences - Int? maxDistanceFreeEndsTerminalExons - Int? maxDistanceGroupingTranscriptStartSites - String? namePrefix + # gffcmp is the default used by the program as well. This needs to be + # defined in order for the output values to be consistent and correct. + String outPrefix = "gffcmp" Boolean C = false Boolean A = false Boolean X = false @@ -44,15 +39,22 @@ task GffCompare { Boolean verbose = false Boolean debugMode = false + File? inputGtfList + String? outputDir + File? genomeSequences + Int? maxDistanceFreeEndsTerminalExons + Int? maxDistanceGroupingTranscriptStartSites + String? namePrefix + Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" # This workaround only works in the input section. - # Issue addressed at https://github.com/openwdl/wdl/pull/263 + # Issue addressed at https://github.com/openwdl/wdl/pull/263. File? noneFile # This is a wdl workaround. Please do not assign! } - # This allows for the creation of output directories + # This allows for the creation of output directories. String dirPrefix = if defined(outputDir) then select_first([outputDir]) + "/" else "" @@ -93,22 +95,22 @@ task GffCompare { then "annotated" else "combined" - # Check if a redundant .gtf will be created + # Check if a redundant .gtf will be created. Boolean createRedundant = C || A || X output { + # noneFile is not stable. Please replace this as soon as wdl spec allows. File annotated = totalPrefix + "." + annotatedName + ".gtf" File loci = totalPrefix + ".loci" File stats = totalPrefix + ".stats" File tracking = totalPrefix + ".tracking" - # noneFile is not stable. Please replace this as soon as wdl spec allows + Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons]) File? redundant = if createRedundant then totalPrefix + ".redundant.gtf" else noneFile File? missedIntrons = if debugMode then totalPrefix + ".missed_introns.gtf" else noneFile - Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons]) } runtime { @@ -117,15 +119,10 @@ task GffCompare { } parameter_meta { - inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"} + # inputs inputGtfFiles: {description: "The input GTF files.", category: "required"} referenceAnnotation: {description: "The GTF file to compare with.", category: "required"} - outputDir: {description: "The location the output should be written.", category: "common"} outPrefix: {description: "The prefix for the output.", category: "advanced"} - genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"} - maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} - maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} - namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} C: {description: "Equivalent to gffcompare's `-C` flag.", category: "advanced"} A: {description: "Equivalent to gffcompare's `-A` flag.", category: "advanced"} X: {description: "Equivalent to gffcompare's `-X` flag.", category: "advanced"} @@ -137,9 +134,14 @@ task GffCompare { noTmap: {description: "Equivalent to gffcompare's `-T` flag.", category: "advanced"} verbose: {description: "Equivalent to gffcompare's `-V` flag.", category: "advanced"} debugMode: {description: "Equivalent to gffcompare's `-D` flag.", category: "advanced"} + inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"} + outputDir: {description: "The location the output should be written.", category: "common"} + genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"} + maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} + maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} + namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { diff --git a/gffread.wdl b/gffread.wdl index d83e4d76..76ee20d1 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -79,4 +79,4 @@ task GffRead { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From ca4fe2d92f42b2c32b42197deeef204cec07762f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 08:54:32 +0100 Subject: [PATCH 364/902] Add another batch of updates. --- CHANGELOG.md | 1 + gatk.wdl | 1 + gffread.wdl | 16 +++++++------ gridss.wdl | 15 +++++++------ hisat2.wdl | 32 +++++++++++++------------- htseq.wdl | 13 ++++++----- isoseq3.wdl | 18 +++++++-------- lima.wdl | 10 ++++----- macs2.wdl | 2 +- manta.wdl | 19 +++++++++------- minimap2.wdl | 27 +++++++++++----------- multiqc.wdl | 63 ++++++++++++++++++++++++++-------------------------- nanopack.wdl | 10 ++++----- 13 files changed, 119 insertions(+), 108 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c04b582..028c7400 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. diff --git a/gatk.wdl b/gatk.wdl index cc5d1de5..7aa2915c 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -64,6 +64,7 @@ task AnnotateIntervals { } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} diff --git a/gffread.wdl b/gffread.wdl index 76ee20d1..343011e9 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -24,19 +24,21 @@ task GffRead { input { File inputGff File genomicSequence + Boolean outputGtfFormat = false + File? genomicIndex # Optional. GFFRead can create this by itself. String? exonsFastaPath String? CDSFastaPath String? proteinFastaPath String? filteredGffPath - Boolean outputGtfFormat = false + Int timeMinutes = 1 + ceil(size(inputGff) * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } # The mkdirs below are hackish. It should be - # ~{"mkir -p $(dirname " + somePath + ")"} - # but this goes wrong. Cromwell will always use ')' even if somepath is not defined. + # ~{"mkir -p $(dirname " + somePath + ")"} but this goes wrong. + # Cromwell will always use ')' even if somepath is not defined. # Which leads to crashing. command { set -e @@ -62,21 +64,21 @@ task GffRead { } runtime { - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputGff: {description: "The input GFF file.", category: "required"} genomicSequence: {description: "The genome.", category: "required"} + outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"} genomicIndex: {description: "The genome's index.", category: "advanced"} exonsFastaPath: {description: "The location the exons fasta should be written to.", category: "advanced"} CDSFastaPath: {description: "The location the CDS fasta should be written to.", category: "advanced"} proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"} filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} - outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/gridss.wdl b/gridss.wdl index 44b9e9f1..9499be5e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -27,11 +27,12 @@ task GRIDSS { File tumorBam File tumorBai String tumorLabel + BwaIndex reference + String outputPrefix = "gridss" + File? normalBam File? normalBai String? normalLabel - BwaIndex reference - String outputPrefix = "gridss" Int jvmHeapSizeGb = 30 Int threads = 1 @@ -68,17 +69,17 @@ task GRIDSS { } parameter_meta { + # inputs tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"} tumorBai: {description: "The index for tumorBam.", category: "required"} tumorLabel: {description: "The name of the (tumor) sample.", category: "required"} + reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} - reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} - outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} - + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling.",category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} - jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/hisat2.wdl b/hisat2.wdl index f9a4bc59..b52bf70f 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -22,9 +22,9 @@ version 1.0 task Hisat2 { input { - Array[File]+ indexFiles File inputR1 File? inputR2 + Array[File]+ indexFiles String outputBam String sample String library @@ -32,22 +32,22 @@ task Hisat2 { String platform = "illumina" Boolean downstreamTranscriptomeAssembly = true String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" - - Int threads = 4 - Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 + + Int? sortThreads + + Int threads = 4 Int? memoryGb Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 - # is a combination of hisat2 and samtools - # hisat2=2.2.0, samtools=1.10 + # is a combination of hisat2 and samtools hisat2=2.2.0 & samtools=1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2880dd9d8ad0a7b221d4eacda9a818e92983128d-0" } - # Samtools sort may block the pipe while it is writing data to disk. + # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) Int estimatedMemoryGb = 1 + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads @@ -81,16 +81,17 @@ task Hisat2 { } runtime { - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" cpu: threads + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage } parameter_meta { - indexFiles: {description: "The hisat2 index files.", category: "required"} + # inputs inputR1: {description: "The first-/single-end FastQ file.", category: "required"} inputR2: {description: "The second-end FastQ file.", category: "common"} + indexFiles: {description: "The hisat2 index files.", category: "required"} outputBam: {description: "The location the output BAM file should be written to.", category: "required"} sample: {description: "The sample id.", category: "required"} library: {description: "The library id.", category: "required"} @@ -98,13 +99,12 @@ task Hisat2 { platform: {description: "The platform used for sequencing.", category: "advanced"} downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/htseq.wdl b/htseq.wdl index cbd8e2ac..cf527535 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -27,9 +27,10 @@ task HTSeqCount { String outputTable = "output.tsv" String order = "pos" String stranded = "no" + Array[String] additionalAttributes = [] + String? featureType String? idattr - Array[String] additionalAttributes = [] Int nprocesses = 1 String memory = "8G" @@ -58,24 +59,24 @@ task HTSeqCount { runtime { cpu: nprocesses - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { + # inputs inputBams: {description: "The input BAM files.", category: "required"} gtfFile: {description: "A GTF/GFF file containing the features of interest.", category: "required"} outputTable: {description: "The path to which the output table should be written.", category: "common"} - nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"} order: {description: "Equivalent to the -r option of htseq-count.", category: "advanced"} stranded: {description: "Equivalent to the -s option of htseq-count.", category: "common"} + additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"} featureType: {description: "Equivalent to the --type option of htseq-count.", category: "advanced"} idattr: {description: "Equivalent to the --idattr option of htseq-count.", category: "advanced"} - additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"} + nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"} memory: {description: "The amount of memory the job requires in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/isoseq3.wdl b/isoseq3.wdl index 5060f0e7..c1c4397c 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -31,7 +31,7 @@ task Refine { String outputDir String outputNamePrefix - Int cores = 2 + Int threads = 2 String memory = "2G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" @@ -44,7 +44,7 @@ task Refine { --min-polya-length ~{minPolyALength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ - --num-threads ~{cores} \ + --num-threads ~{threads} \ --log-file "~{outputDir}/~{outputNamePrefix}.stderr.log" \ ~{inputBamFile} \ ~{primerFile} \ @@ -61,7 +61,7 @@ task Refine { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -77,7 +77,7 @@ task Refine { primerFile: {description: "Barcode/primer fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputNamePrefix: {description: "Basename of the output files.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/lima.wdl b/lima.wdl index 7ef9d4ab..1da4ef5e 100644 --- a/lima.wdl +++ b/lima.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/macs2.wdl b/macs2.wdl index fad3cb00..757eaf67 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -54,4 +54,4 @@ task PeakCalling { memory: memory docker: dockerImage } -} \ No newline at end of file +} diff --git a/manta.wdl b/manta.wdl index 5382d2a5..a7b7cf38 100644 --- a/manta.wdl +++ b/manta.wdl @@ -27,9 +27,10 @@ task Germline { File referenceFasta File referenceFastaFai String runDir = "./manta_run" + Boolean exome = false + File? callRegions File? callRegionsIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -71,9 +72,9 @@ task Germline { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } runDir: {description: "The directory to use as run/output directory.", category: "common"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The the number of cores required to run a program", category: "required"} memoryGb: {description: "The memory required to run the manta", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -85,14 +86,15 @@ task Somatic { input { File tumorBam File tumorBamIndex - File? normalBam - File? normalBamIndex File referenceFasta File referenceFastaFai String runDir = "./manta_run" + Boolean exome = false + + File? normalBam + File? normalBamIndex File? callRegions File? callRegionsIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -138,16 +140,17 @@ task Somatic { } parameter_meta { + # inputs tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} - normalBam: {description: "The normal/control sample's BAM file.", category: "common"} - normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} runDir: {description: "The directory to use as run/output directory.", category: "common"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/minimap2.wdl b/minimap2.wdl index fb31fb7f..1b719da6 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -61,7 +61,7 @@ task Indexing { } parameter_meta { - # input + # inputs useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for pacbio).", category: "advanced"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} minimizerWindowSize: {description: "Minimizer window size.", category: "advanced"} @@ -73,7 +73,7 @@ task Indexing { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # output + # outputs indexFile: {description: "Indexed reference file."} } } @@ -137,27 +137,28 @@ task Mapping { } parameter_meta { + # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} + skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} + secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} + referenceFile: {description: "Reference fasta file.", category: "required"} + queryFile: {description: "Input fasta file.", category: "required"} maxIntronLength: {description: "Max intron length (effective with -xsplice; changing -r).", category: "advanced"} maxFragmentLength: {description: "Max fragment length (effective with -xsr or in the fragment mode).", category: "advanced"} - skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} - addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} - secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} - referenceFile: {description: "Reference fasta file.", category: "required"} - queryFile: {description: "Input fasta file.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # output + # outputs alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} } } diff --git a/multiqc.wdl b/multiqc.wdl index 7dcf333e..647394e9 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -22,16 +22,28 @@ version 1.0 task MultiQC { input { - # Use a string here so cromwell does not relocate an entire analysis directory + # Use a string here so cromwell does not relocate an entire + # analysis directory. Array[File] reports Boolean force = false Boolean dirs = false - Int? dirsDepth Boolean fullNames = false + String outDir = "." + Boolean dataDir = false + Boolean zipDataDir = true + Boolean export = false + Boolean flat = false + Boolean interactive = true + Boolean lint = false + Boolean pdf = false + # This must be actively enabled in my opinion. + # The tools default is to upload. + Boolean megaQCUpload = false + + Int? dirsDepth String? title String? comment String? fileName - String outDir = "." String? template String? tag String? ignore @@ -40,21 +52,15 @@ task MultiQC { File? fileList Array[String]+? exclude Array[String]+? module - Boolean dataDir = false String? dataFormat - Boolean zipDataDir = true - Boolean export = false - Boolean flat = false - Boolean interactive = true - Boolean lint = false - Boolean pdf = false - Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload. File? config # A directory String? clConfig + String? memory Int timeMinutes = 2 + ceil(size(reports, "G") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } + Int memoryGb = 2 + ceil(size(reports, "G")) # This is where the reports end up. It does not need to be changed by the @@ -69,8 +75,9 @@ task MultiQC { # By hashing the parent path we make sure there are no file colissions as # files from the same directory end up in the same directory, while files # from other directories get their own directory. Cromwell also uses this - # strategy. Using python's builtin hash is unique enough for these purposes. - + # strategy. Using python's builtin hash is unique enough + # for these purposes. + command { python3 < Date: Mon, 2 Nov 2020 09:17:33 +0100 Subject: [PATCH 365/902] Address travis error. --- CHANGELOG.md | 3 +++ fastqc.wdl | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 028c7400..c331112c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Bwa & bwa-mem2: Add parameter_meta for `outputHla`. ++ Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. ++ Bam2fastx: Add localisation of input files to Bam2Fasta task. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. + CCS: Update CCS to version 5. diff --git a/fastqc.wdl b/fastqc.wdl index dd3dfc2e..feeeaae5 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -47,7 +47,7 @@ task Fastqc { Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0 + String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? noneArray File? noneFile From 163290340ff4f5ed0488c69d2c194dbb3428a423 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 11:57:06 +0100 Subject: [PATCH 366/902] Add another batch of updated tasks. --- CHANGELOG.md | 4 + centrifuge.wdl | 4 +- ncbi.wdl | 51 ++++---- pbbam.wdl | 10 +- pbmm2.wdl | 13 ++- picard.wdl | 312 +++++++++++++++++++++++-------------------------- rtg.wdl | 79 ++++++------- sambamba.wdl | 57 +++++---- 8 files changed, 257 insertions(+), 273 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c331112c..f0dfaf1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Picard: Add parameter_meta to `SortSam`. ++ pbmm2: Add parameter_meta for `sample`. ++ Centrifuge: Rename output in task `KReport` to `KrakenReport` to resolve + name collision with task name. + Bwa & bwa-mem2: Add parameter_meta for `outputHla`. + Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. + Bam2fastx: Add localisation of input files to Bam2Fasta task. diff --git a/centrifuge.wdl b/centrifuge.wdl index 1637abdd..07dc7f85 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -270,7 +270,7 @@ task KReport { >>> output { - File KReport = outputPrefix + "_kreport.tsv" + File KrakenReport = outputPrefix + "_kreport.tsv" } runtime { @@ -294,7 +294,7 @@ task KReport { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - KReport: {description: "File with kraken style report."} + KrakenReport: {description: "File with kraken style report."} } } diff --git a/ncbi.wdl b/ncbi.wdl index d157d902..da753bac 100644 --- a/ncbi.wdl +++ b/ncbi.wdl @@ -23,6 +23,10 @@ version 1.0 task GenomeDownload { input { String outputPath + Boolean verbose = true + Boolean debug = false + String executable = "ncbi-genome-download" + String? section = "refseq" String? format = "all" String? assemblyLevel = "all" @@ -32,11 +36,7 @@ task GenomeDownload { String? ncbiBaseUri Int? parallel Int? retries - Boolean verbose = true - Boolean debug = false String? domain = "all" - - String executable = "ncbi-genome-download" String? preCommand } @@ -58,22 +58,22 @@ task GenomeDownload { ~{true="--debug" false ="" debug } \ ~{domain} - # Check md5sums for all downloaded files + # Check md5sums for all downloaded files. for folder in $(realpath ~{outputPath})/*/*/* - do - ( - md5sums="$( - cd $folder - for file in * - do - if [[ ! $file == "MD5SUMS" ]] - then - grep $file MD5SUMS - fi - done - )" - cd $folder; echo $md5sums | md5sum -c) - done + do + ( + md5sums="$( + cd $folder + for file in * + do + if [[ ! $file == "MD5SUMS" ]] + then + grep $file MD5SUMS + fi + done + )" + cd $folder; echo $md5sums | md5sum -c) + done } output { @@ -106,7 +106,7 @@ task DownloadNtFasta{ mkdir -p ~{ntDir} rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ~{ntDir} (cd ~{ntDir} && md5sum -c nt.gz.md5) - # Only unzip when necessary + # Only unzip when necessary. if ~{true='true' false='false' unzip} then zcat ~{ntDir}/nt.gz > ~{ntFilePath} @@ -132,15 +132,16 @@ task DownloadAccessionToTaxId { command { set -e -o pipefail mkdir -p ~{downloadDir} - rsync -av \ - --partial \ - rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* \ - ~{downloadDir} + rsync \ + -av \ + --partial \ + rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* \ + ~{downloadDir} (cd ~{downloadDir} && md5sum -c *.md5) for file in ~{downloadDir}/nucl_*.accession2taxid.gz do zcat $file | tail -n +2 | cut -f 2,3 ~{true="| gzip" false='' gzip} > \ - $file.seqtaxmap~{true='.gz' false='' gzip} + $file.seqtaxmap~{true='.gz' false='' gzip} done } diff --git a/pbbam.wdl b/pbbam.wdl index 52737a00..d271a11a 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -18,12 +18,14 @@ version 1.0 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. task Index { input { File bamFile + String? outputBamPath - + String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" @@ -60,11 +62,9 @@ task Index { parameter_meta { # inputs bamFile: {description: "The BAM file for which an index should be made.", category: "required"} - outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", - category: "common"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/pbmm2.wdl b/pbmm2.wdl index 31d4c667..5fda1c87 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -58,9 +58,10 @@ task Mapping { } parameter_meta { + # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} - sample: {description: "Name of the sample"} + sample: {description: "Name of the sample.", category: "required"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -68,7 +69,7 @@ task Mapping { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # output + # outputs outputAlignmentFile: {description: "Mapped bam file."} outputIndexFile: {description: "Bam index file."} } diff --git a/picard.wdl b/picard.wdl index 49db8b8b..f1876f7b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -26,8 +26,8 @@ task BedToIntervalList { File dict String outputPath = "regions.interval_list" - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -47,9 +47,9 @@ task BedToIntervalList { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -57,12 +57,10 @@ task BedToIntervalList { bedFile: {description: "A bed file.", category: "required"} dict: {description: "A sequence dict file.", category: "required"} outputPath: {description: "The location the output interval list should be written to.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -74,17 +72,19 @@ task CollectHsMetrics { File referenceFastaDict File referenceFastaFai File targets - File? baits String basename + File? baits + # Use the targets file as baits as a fallback, since often the baits # for a certain capture kit are not available. File baitsFile = select_first([baits, targets]) File targetsFile = targets - Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 3072 - # Additional * 2 because picard multiple metrics reads the reference fasta twice. + Int memoryMb = javaXmxMb + 512 + # Additional * 2 because picard multiple metrics reads the + # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -106,9 +106,9 @@ task CollectHsMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -116,18 +116,15 @@ task CollectHsMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} targets: {description: "Picard interval file of the capture targets.", category: "required"} - baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -139,7 +136,6 @@ task CollectMultipleMetrics { File referenceFastaDict File referenceFastaFai String basename - Boolean collectAlignmentSummaryMetrics = true Boolean collectInsertSizeMetrics = true Boolean qualityScoreDistribution = true @@ -150,14 +146,13 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 3072 + Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } - command { set -e mkdir -p "$(dirname ~{basename})" @@ -173,8 +168,7 @@ task CollectMultipleMetrics { ~{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \ ~{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \ ~{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \ - ~{true="PROGRAM=CollectSequencingArtifactMetrics" false="" - collectSequencingArtifactMetrics} \ + ~{true="PROGRAM=CollectSequencingArtifactMetrics" false="" collectSequencingArtifactMetrics} \ ~{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics} } @@ -221,9 +215,9 @@ task CollectMultipleMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -231,30 +225,21 @@ task CollectMultipleMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", - category: "advanced"} - collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", - category: "advanced"} - qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", - category: "advanced"} + collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", category: "advanced"} + collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", category: "advanced"} + qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", category: "advanced"} meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", category: "advanced"} - collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", - category: "advanced"} + collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", category: "advanced"} collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", category: "advanced"} - collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", - category: "advanced"} - collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", - category: "advanced"} + collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", category: "advanced"} + collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -266,9 +251,9 @@ task CollectRnaSeqMetrics { String basename String strandSpecificity = "NONE" - String memory = "9G" String javaXmx = "8G" - # With 6 minutes per G there were several timeouts. + String memory = "9G" + # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -286,14 +271,14 @@ task CollectRnaSeqMetrics { } output { - File? chart = basename + ".RNA_Metrics.pdf" File metrics = basename + ".RNA_Metrics" + File? chart = basename + ".RNA_Metrics.pdf" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -302,15 +287,11 @@ task CollectRnaSeqMetrics { inputBamIndex: {description: "The index of the input BAM file.", category: "required"} refRefflat: {description: "A refflat file containing gene annotations.", catehory: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.", - category: "common"} - + strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -325,8 +306,8 @@ task CollectTargetedPcrMetrics { Array[File]+ targetIntervals String basename - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -352,9 +333,9 @@ task CollectTargetedPcrMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -362,21 +343,15 @@ task CollectTargetedPcrMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ampliconIntervals: {description: "An interval list describinig the coordinates of the amplicons sequenced.", - category: "required"} - targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.", - category: "required"} + ampliconIntervals: {description: "An interval list describinig the coordinates of the amplicons sequenced.", category: "required"} + targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -388,8 +363,8 @@ task CollectVariantCallingMetrics { File inputVCFIndex String basename - String memory = "9G" String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -410,24 +385,22 @@ task CollectVariantCallingMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs dbsnp: {description: "DBSNP vcf file to use with CollectVariantCallingMetrics.", category: "required"} dbsnpIndex: {description: "Index file for the DBSNP VCF.", category: "required"} - inputVCF: {description: "Input VCF file", category: "required"} + inputVCF: {description: "Input VCF file.", category: "required"} inputVCFIndex: {description: "Index file for the input VCF.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -436,8 +409,8 @@ task CreateSequenceDictionary { File inputFile String outputDir - String memory = "3G" String javaXmx = "2G" + String memory = "3G" String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -464,8 +437,8 @@ task CreateSequenceDictionary { # inputs inputFile: {description: "The input fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -473,17 +446,19 @@ task CreateSequenceDictionary { } } -# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs +# Combine multiple recalibrated BAM files from scattered +# ApplyRecalibration runs. task GatherBamFiles { input { Array[File]+ inputBams Array[File]+ inputBamsIndex String outputBamPath + Boolean createMd5File = false - Int memoryMb = javaXmxMb + 512 - Int javaXmxMb = 1024 Int? compressionLevel - Boolean createMd5File = false + + Int javaXmxMb = 1024 + Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" @@ -508,9 +483,9 @@ task GatherBamFiles { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -518,14 +493,12 @@ task GatherBamFiles { inputBams: {description: "The BAM files to be merged together.", category: "required"} inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -535,8 +508,8 @@ task GatherVcfs { Array[File]+ inputVcfIndexes String outputVcfPath = "out.vcf.gz" - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -555,9 +528,9 @@ task GatherVcfs { } runtime { - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -565,17 +538,14 @@ task GatherVcfs { inputVcfs: {description: "The VCF files to be merged together.", category: "required"} inputVcfIndexes: {description: "The indexes of the input VCF files.", category: "required"} outputVcfPath: {description: "The path where the merged VCF file will be written.", caregory: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Mark duplicate reads to avoid counting non-independent observations +# Mark duplicate reads to avoid counting non-independent observations. task MarkDuplicates { input { Array[File]+ inputBams @@ -583,31 +553,32 @@ task MarkDuplicates { String metricsPath Int compressionLevel = 1 Boolean createMd5File = false - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. - # NOTE: this might change in the future when the intel deflater is updated! + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). + # Higher compression levels similar to intel deflater. + # NOTE: this might change in the future when the intel + # deflater is updated! Boolean useJdkDeflater = true - # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. + # The program default for READ_NAME_REGEX is appropriate in nearly every case. + # Sometimes we wish to supply "null" in order to turn off optical duplicate detection. + # This can be desirable if you don't mind the estimated library size + # being wrong and optical duplicate detection is taking >7 days and failing. + String? read_name_regex + + # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. # https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L1040 Int javaXmxMb = 6656 # 6.5G String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" - - # The program default for READ_NAME_REGEX is appropriate in nearly every case. - # Sometimes we wish to supply "null" in order to turn off optical duplicate detection - # This can be desirable if you don't mind the estimated library size being wrong and - # optical duplicate detection is taking >7 days and failing - String? read_name_regex } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get # marked correctly. This works because the output of BWA is query-grouped and therefore, # so is the output of MergeBamAlignment. While query-grouped isn't actually query-sorted, - # it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname" - + # it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname". command { set -e mkdir -p "$(dirname ~{outputBamPath})" @@ -625,7 +596,7 @@ task MarkDuplicates { ADD_PG_TAG_TO_READS=false \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -636,9 +607,9 @@ task MarkDuplicates { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -646,42 +617,39 @@ task MarkDuplicates { inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} - read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} - compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} + read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs +# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs. task MergeVCFs { input { Array[File]+ inputVCFs Array[File]+ inputVCFsIndexes String outputVcfPath - - String memory = "5G" - String javaXmx = "4G" - Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" Int compressionLevel = 1 - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). + # Higher compression levels similar to intel deflater. # NOTE: this might change in the future when the intel deflater is updated! Boolean useJdkDeflater = true + String javaXmx = "4G" + String memory = "5G" + Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } - # Using MergeVcfs instead of GatherVcfs so we can create indices - # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket - + # Using MergeVcfs instead of GatherVcfs so we can create indices. + # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket. command { set -e mkdir -p "$(dirname ~{outputVcfPath})" @@ -691,7 +659,7 @@ task MergeVCFs { OUTPUT=~{outputVcfPath} \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -700,9 +668,9 @@ task MergeVCFs { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -710,16 +678,13 @@ task MergeVCFs { inputVCFs: {description: "The VCF files to be merged.", category: "required"} inputVCFsIndexes: {description: "The indexes of the VCF files.", category: "required"} outputVcfPath: {description: "The location the output VCF file should be written to.", category: "required"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} - compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -729,10 +694,12 @@ task SamToFastq { File inputBamIndex Boolean paired = true - String memory = "17G" String javaXmx = "16G" # High memory default to avoid crashes. + String memory = "17G" + Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" - File? NONE + + File? noneFile } String outputRead1 = basename(inputBam, "\.[bs]am") + "_R1.fastq.gz" @@ -751,13 +718,20 @@ task SamToFastq { output { File read1 = outputRead1 - File? read2 = if paired then outputRead2 else NONE - File? unpairedRead = if paired then outputUnpaired else NONE + File? read2 = if paired then outputRead2 else noneFile + File? unpairedRead = if paired then outputUnpaired else noneFile } runtime { - docker: dockerImage memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + meta { + WDL_AID: { + exclude: ["noneFile"] + } } } @@ -766,8 +740,8 @@ task ScatterIntervalList { File interval_list Int scatter_count - String memory = "4G" String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -790,8 +764,8 @@ task ScatterIntervalList { } runtime { - docker: dockerImage memory: memory + docker: dockerImage } } @@ -804,7 +778,7 @@ task SortSam { Int maxRecordsInRam = 500000 Int compressionLevel = 1 - # Default ram of 4 GB. Using 125001.0 to prevent an answer of + # Default ram of 4 GB. Using 125001.0 to prevent an answer of # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) @@ -840,13 +814,16 @@ task SortSam { } parameter_meta { - inputBam: {description: "The unsorted input BAM file", category: "required"} + # inputs + inputBam: {description: "The unsorted input BAM file.", category: "required"} outputPath: {description: "The location the output BAM file should be written to.", category: "required"} - XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", - category: "advanced"} + sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"} + createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"} + maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} + compressionLevel: {description: "Compression level for all compressed files created.", category: "advanced"} + XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -854,10 +831,11 @@ task SortVcf { input { Array[File]+ vcfFiles String outputVcfPath + File? dict - String memory = "9G" String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -879,9 +857,9 @@ task SortVcf { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -889,13 +867,10 @@ task SortVcf { vcfFiles: {description: "The VCF files to merge and sort.", category: "required"} outputVcfPath: {description: "The location the sorted VCF files should be written to.", category: "required"} dict: {description: "A sequence dictionary matching the VCF files.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -904,8 +879,9 @@ task RenameSample { File inputVcf String outputPath = "./picard/renamed.vcf" String newSampleName - String memory = "9G" + String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" } @@ -925,9 +901,9 @@ task RenameSample { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -935,8 +911,8 @@ task RenameSample { inputVcf: {description: "The VCF file to process.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} newSampleName: {description: "A string to replace the old sample name.", category: "required"} - memory: {description: "The memory required to run the programs", category: "advanced"} - javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/rtg.wdl b/rtg.wdl index 104a5ef9..bfd32957 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -22,13 +22,14 @@ version 1.0 task Format { input { + Array[File]+ inputFiles String format = "fasta" String outputPath = "seq_data.sdf" - Array[File]+ inputFiles - String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" + String rtgMem = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputFiles) * 2) + String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } command { @@ -44,21 +45,20 @@ task Format { } runtime { - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", - category: "advanced"} - outputPath: {description: "Where the output should be placed.", category: "advanced"} + # inputs inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", category: "advanced"} + outputPath: {description: "Where the output should be placed.", category: "advanced"} + rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -68,18 +68,20 @@ task VcfEval { File baselineIndex File calls File callsIndex - File? evaluationRegions - File? bedRegions + Boolean squashPloidy = false + String outputMode = "split" String outputDir = "output/" File template Boolean allRecords = false Boolean decompose = false Boolean refOverlap = false + + File? evaluationRegions + File? bedRegions String? sample - Boolean squashPloidy = false - String outputMode = "split" - Int threads = 1 # tool default is number of cores in the system 😱 + String rtgMem = "8G" + Int threads = 1 # Tool default is number of cores in the system 😱. String memory = "9G" Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" @@ -132,39 +134,32 @@ task VcfEval { } runtime { - docker: dockerImage cpu: threads memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - baseline: {description: "VCF file containing baseline variants", category: "required"} - baselineIndex: {description: "The baseline's VCF index", category: "required"} - calls: {description: "VCF file containing called variants", category: "required"} - callsIndex: {description: "The call's VCF index", category: "required"} - outputDir: {description: "Directory for output", category: "advanced"} - bedRegions: {description: "if set, only read VCF records that overlap the ranges contained in the specified BED file", category: "advanced"} - evaluationRegions: {description: "if set, evaluate within regions contained in the supplied BED file, allowing transborder matches. To be used for truth-set high-confidence regions or other regions of interest where region boundary effects should be minimized", - category: "advanced"} - template: {description: "SDF of the reference genome the variants are called against", category: "required"} - allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\")", - category: "common"} - decompose: {description: "decompose complex variants into smaller constituents to allow partial credit", category: "common"} - refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap)", - category: "common"} - sample: {description: "the name of the sample to select. Use , to select different sample names for baseline and calls. (Required when using multi-sample VCF files)", - category: "common"} - squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences", - category: "common"} - outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split)", - category: "advanced"} - threads: {description: "Number of threads. Default is 1", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} + # inputs + baseline: {description: "VCF file containing baseline variants.", category: "required"} + baselineIndex: {description: "The baseline's VCF index.", category: "required"} + calls: {description: "VCF file containing called variants.", category: "required"} + callsIndex: {description: "The call's VCF index.", category: "required"} + squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences.", category: "common"} + outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split).", category: "advanced"} + outputDir: {description: "Directory for output.", category: "advanced"} + template: {description: "SDF of the reference genome the variants are called against.", category: "required"} + allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\").", category: "common"} + decompose: {description: "decompose complex variants into smaller constituents to allow partial credit.", category: "common"} + refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap).", category: "common"} + sample: {description: "the name of the sample to select. Use , to select different sample names for baseline and calls. (Required when using multi-sample VCF files).", category: "common"} + bedRegions: {description: "if set, only read VCF records that overlap the ranges contained in the specified BED file.", category: "advanced"} + evaluationRegions: {description: "if set, evaluate within regions contained in the supplied BED file, allowing transborder matches. To be used for truth-set high-confidence regions or other regions of interest where region boundary effects should be minimized.", category: "advanced"} + rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"} + threads: {description: "Number of threads. Default is 1.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/sambamba.wdl b/sambamba.wdl index cd8da21e..df5ab4d1 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -20,29 +20,31 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - task Markdup { input { Array[File] inputBams String outputPath - # Sambamba scales like this: 1 thread is fully utilized (1). 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7. - # 2 threads reduces wall clock time by more than 40%. - Int threads = 2 Int compressionLevel = 1 - Int? hashTableSize - Int? overFlowListSize - # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1 + # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1. Int sortBufferSize = 2048 Int ioBufferSize = 128 - Boolean removeDuplicates = false + Boolean removeDuplicates = false + Int? hashTableSize + Int? overFlowListSize + + # Sambamba scales like this: 1 thread is fully utilized (1). + # 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7. + # 2 threads reduces wall clock time by more than 40%. + Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize - String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } + String bamIndexPath = sub(outputPath, "\.bam$", ".bai") command { @@ -57,7 +59,7 @@ task Markdup { ~{"--sort-buffer-size " + sortBufferSize} \ ~{"--io-buffer-size " + ioBufferSize} \ ~{sep=' ' inputBams} ~{outputPath} - # sambamba creates an index for us + # sambamba creates an index for us. mv ~{outputPath}.bai ~{bamIndexPath} } @@ -67,8 +69,8 @@ task Markdup { } runtime { - memory: "~{memoryMb}M" cpu: threads + memory: "~{memoryMb}M" time_minutes: timeMinutes docker: dockerImage } @@ -78,17 +80,19 @@ task Markdup { inputBams: {description: "The input BAM files.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "required"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"} - removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"} - hashTableSize: {description: "Sets sambamba's hash table size", category: "advanced"} - overFlowListSize: {description: "Sets sambamba's overflow list size", category: "advanced"} - sortBufferSize: {description: "The amount of mb allocated to the sort buffer", category: "advanced"} + sortBufferSize: {description: "The amount of mb allocated to the sort buffer.", category: "advanced"} ioBufferSize: {description: "The amount of mb allocated to each IO buffer. Sambamba uses two IO buffers.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"} + hashTableSize: {description: "Sets sambamba's hash table size.", category: "advanced"} + overFlowListSize: {description: "Sets sambamba's overflow list size.", category: "advanced"} threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} } } @@ -98,14 +102,15 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 1 + Int memoryPerThreadGb = 4 + Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } - # Select first needed as outputPath is optional input. (bug in cromwell) + # Select first needed as outputPath is optional input (bug in cromwell). String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command { @@ -118,7 +123,7 @@ task Sort { -m ~{memoryPerThreadGb}G \ -o ~{outputPath} \ ~{inputBam} - # sambamba creates an index for us + # sambamba creates an index for us. mv ~{outputPath}.bai ~{bamIndexPath} } @@ -140,12 +145,14 @@ task Sort { outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} } -} \ No newline at end of file +} From f81a99e864af4a567a33e0850dfd1f0672d60a96 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 12:16:45 +0100 Subject: [PATCH 367/902] Update layout samtools.wdl. --- samtools.wdl | 92 +++++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 9e415b0e..496cf233 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -55,8 +55,7 @@ task BgzipAndIndex { outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -104,11 +103,12 @@ task Fastq { String outputRead1 String? outputRead2 String? outputRead0 + Boolean appendReadNumber = false + Boolean outputQuality = false + Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter - Boolean appendReadNumber = false - Boolean outputQuality = false Int? compressionLevel Int threads = 1 @@ -151,16 +151,16 @@ task Fastq { outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} - includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} - excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} - excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} - appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} + appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`.", category: "advanced"} outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} + includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`.", category: "advanced"} + excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`.", category: "advanced"} + excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`.", category: "advanced"} + compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -168,6 +168,7 @@ task FilterShortReadsBam { input { File bamFile String outputPathBam + String memory = "1G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -196,6 +197,7 @@ task FilterShortReadsBam { } parameter_meta { + # inputs bamFile: {description: "The bam file to process.", category: "required"} outputPathBam: {description: "The filtered bam file.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -236,15 +238,16 @@ task Flagstat { outputPath: {description: "The location the ouput should be written to.", category: "required"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task Index { input { File bamFile + String? outputBamPath + String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -281,12 +284,10 @@ task Index { parameter_meta { # inputs bamFile: {description: "The BAM file for which an index should be made.", category: "required"} - outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", - category: "common"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -319,8 +320,7 @@ task Markdup { inputBam: {description: "The BAM file to be processed.", category: "required"} outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -329,12 +329,13 @@ task Merge { Array[File]+ bamFiles String outputBamPath = "merged.bam" Boolean force = true - Int threads = 1 - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + Int threads = 1 String memory = "4G" + Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } + String indexPath = sub(outputBamPath, "\.bam$",".bai") # Samtools uses additional threads for merge. @@ -355,21 +356,20 @@ task Merge { runtime { cpu: threads - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -379,14 +379,15 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 1 + Int memoryPerThreadGb = 4 + Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } - # Select first needed as outputPath is optional input. (bug in cromwell) + # Select first needed as outputPath is optional input (bug in cromwell). String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command { @@ -410,10 +411,10 @@ task Sort { } runtime { - cpu: 1 + cpu: threads memory: "~{memoryGb}G" - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -422,14 +423,15 @@ task Sort { outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description "Sorted BAM file index."} } } @@ -438,10 +440,13 @@ task Tabix { File inputFile String outputFilePath = "indexed.vcf.gz" String type = "vcf" + Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } - # FIXME: It is better to do the indexing on VCF creation. Not in a separate task. With file localization this gets hairy fast. + + # FIXME: It is better to do the indexing on VCF creation. + # Not in a separate task. With file localization this gets hairy fast. command { set -e mkdir -p "$(dirname ~{outputFilePath})" @@ -459,27 +464,26 @@ task Tabix { runtime { time_minutes: timeMinutes - docker: dockerImage + docker: dockerImage } parameter_meta { # inputs inputFile: {description: "The file to be indexed.", category: "required"} - outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", - category: "common"} + outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task View { input { File inFile - File? referenceFasta String outputFileName = "view.bam" Boolean uncompressedBamOutput = false + + File? referenceFasta Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter @@ -490,9 +494,10 @@ task View { Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } + String outputIndexPath = basename(outputFileName) + ".bai" - # Always output to bam and output header + # Always output to bam and output header. command { set -e mkdir -p "$(dirname ~{outputFileName})" @@ -524,9 +529,9 @@ task View { parameter_meta { # inputs inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} - referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} @@ -534,7 +539,6 @@ task View { threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From d101e77cf3211079a7b7ca50c0203ffea811919b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 15:38:52 +0100 Subject: [PATCH 368/902] Add last set of updates. --- .github/PULL_REQUEST_TEMPLATE.md | 3 +- CHANGELOG.md | 2 + pacbio.wdl | 89 +++++++++++++++++++++++++ samtools.wdl | 2 +- seqtk.wdl | 9 +-- smoove.wdl | 9 +-- somaticseq.wdl | 89 ++++++++++++------------- spades.wdl | 12 ++-- star.wdl | 20 +++--- strelka.wdl | 34 +++++----- stringtie.wdl | 18 ++--- survivor.wdl | 19 +++--- talon.wdl | 10 +-- transcriptclean.wdl | 21 +++--- umi-tools.wdl | 41 ++++++------ unicycler.wdl | 5 +- vardict.wdl | 41 ++++++------ vt.wdl | 20 +++--- whatshap.wdl | 111 ++++++++++++++++--------------- wisestork.wdl | 44 ++++++------ 20 files changed, 351 insertions(+), 248 deletions(-) create mode 100644 pacbio.wdl diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 199344f5..1d52f502 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,3 @@ - ### Checklist -- [ ] Pull request details were added to CHANGELOG.md +- [ ] Pull request details were added to CHANGELOG.md. - [ ] `parameter_meta` for each task is up to date. diff --git a/CHANGELOG.md b/CHANGELOG.md index f0dfaf1f..e7d7fed7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. + Centrifuge: Rename output in task `KReport` to `KrakenReport` to resolve @@ -20,6 +21,7 @@ version 5.0.0-dev + Bam2fastx: Add localisation of input files to Bam2Fasta task. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. ++ Add PacBio preprocessing specific tasks `mergePacBio` & `ccsChunks`. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. diff --git a/pacbio.wdl b/pacbio.wdl new file mode 100644 index 00000000..01f6d4fd --- /dev/null +++ b/pacbio.wdl @@ -0,0 +1,89 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task mergePacBio { + input { + Array[File]+ reports + String mergedReport + + String memory = "4G" + String dockerImage = "lumc/pacbio-merge:0.2" + } + + command { + set -e + mkdir -p $(dirname ~{mergedReport}) + pacbio_merge \ + --reports ~{sep=" " reports} \ + --json-output ~{mergedReport} + } + + runtime { + memory: memory + docker: dockerImage + } + + output { + File MergedReport = mergedReport + } + + parameter_meta { + # inputs + reports: {description: "The PacBio report files to merge.", category: "required"} + mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task ccsChunks { + input { + Int chunkCount + + String memory = "4G" + String dockerImage = "python:3.7-slim" + } + + command { + set -e + python <' "modified_strelka.vcf" > ~{outputVCFName} } @@ -425,10 +424,10 @@ task ModifyStrelka { } parameter_meta { + # inputs strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/spades.wdl b/spades.wdl index 204dbfea..7cc16d21 100644 --- a/spades.wdl +++ b/spades.wdl @@ -22,10 +22,11 @@ version 1.0 task Spades { input { - String outputDir - String? preCommand File read1 File? read2 + String outputDir + + String? preCommand File? interlacedReads File? sangerReads File? pacbioReads @@ -44,12 +45,13 @@ task Spades { Boolean? disableGzipOutput Boolean? disableRepeatResolution File? dataset - Int threads = 1 - Int memoryGb = 16 File? tmpDir String? k Float? covCutoff Int? phredOffset + + Int threads = 1 + Int memoryGb = 16 } command { @@ -100,4 +102,4 @@ task Spades { cpu: threads memory: "~{memoryGb}G" } -} \ No newline at end of file +} diff --git a/star.wdl b/star.wdl index 3d0e2eb0..68193fcd 100644 --- a/star.wdl +++ b/star.wdl @@ -24,6 +24,7 @@ task GenomeGenerate { input { String genomeDir = "STAR_index" File referenceFasta + File? referenceGtf Int? sjdbOverhang @@ -61,8 +62,10 @@ task GenomeGenerate { File? sjdbListFromGtfOut = "~{genomeDir}/sjdbList.fromGTF.out.tab" File? sjdbListOut = "~{genomeDir}/sjdbList.out.tab" File? transcriptInfo = "~{genomeDir}/transcriptInfo.tab" - Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, chrStart, genome, genomeParameters, - sa, saIndex, exonGeTrInfo, exonInfo, geneInfo, sjdbInfo, sjdbListFromGtfOut, + Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, + chrStart, genome, genomeParameters, + sa, saIndex, exonGeTrInfo, exonInfo, + geneInfo, sjdbInfo, sjdbListFromGtfOut, sjdbListOut, transcriptInfo]) } @@ -74,16 +77,15 @@ task GenomeGenerate { } parameter_meta { + # inputs genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtf: {description: "The reference GTF file.", category: "common"} sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -95,6 +97,8 @@ task Star { String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" String readFilesCommand = "zcat" + Int outBAMcompression = 1 + Int? outFilterScoreMin Float? outFilterScoreMinOverLread Int? outFilterMatchNmin @@ -103,7 +107,6 @@ task Star { String? twopassMode = "Basic" Array[String]? outSAMattrRGline String? outSAMunmapped = "Within KeepPairs" - Int outBAMcompression = 1 Int? limitBAMsortRAM Int runThreadN = 4 @@ -119,7 +122,7 @@ task Star { # So we solve it with an optional memory string and using select_first # in the runtime section. - #TODO Could be extended for all possible output extensions + #TODO: Could be extended for all possible output extensions. Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} command { @@ -157,12 +160,14 @@ task Star { } parameter_meta { + # inputs inputR1: {description: "The first-/single-end FastQ files.", category: "required"} inputR2: {description: "The second-end FastQ files (in the same order as the first-end files).", category: "common"} indexFiles: {description: "The star index files.", category: "required"} outFileNamePrefix: {description: "The prefix for the output files. May include directories.", category: "required"} outSAMtype: {description: "The type of alignment file to be produced. Currently only `BAM SortedByCoordinate` is supported.", category: "advanced"} readFilesCommand: {description: "Equivalent to star's `--readFilesCommand` option.", category: "advanced"} + outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} outFilterScoreMin: {description: "Equivalent to star's `--outFilterScoreMin` option.", category: "advanced"} outFilterScoreMinOverLread: {description: "Equivalent to star's `--outFilterScoreMinOverLread` option.", category: "advanced"} outFilterMatchNmin: {description: "Equivalent to star's `--outFilterMatchNmin` option.", category: "advanced"} @@ -174,7 +179,6 @@ task Star { limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"} runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/strelka.wdl b/strelka.wdl index 50c38b55..f4b9888b 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -29,11 +29,12 @@ task Germline { Array[File]+ indexes File referenceFasta File referenceFastaFai - File? callRegions - File? callRegionsIndex Boolean exome = false Boolean rna = false + File? callRegions + File? callRegionsIndex + Int cores = 1 Int memoryGb = 4 Int timeMinutes = 90 @@ -61,28 +62,27 @@ task Germline { } runtime { - docker: dockerImage cpu: cores - time_minutes: timeMinutes memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs runDir: {description: "The directory to use as run/output directory.", category: "common"} bams: {description: "The input BAM files.", category: "required"} indexes: {description: "The indexes for the input BAM files.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} - callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} rna: {description: "Whether or not the data is from RNA sequencing.", category: "common"} - + callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} + callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -95,11 +95,12 @@ task Somatic { File tumorBamIndex File referenceFasta File referenceFastaFai + Boolean exome = false + File? callRegions File? callRegionsIndex File? indelCandidatesVcf File? indelCandidatesVcfIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -133,13 +134,14 @@ task Somatic { } runtime { - docker: dockerImage cpu: cores - time_minutes: timeMinutes memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs runDir: {description: "The directory to use as run/output directory.", category: "common"} normalBam: {description: "The normal/control sample's BAM file.", category: "required"} normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} @@ -147,17 +149,15 @@ task Somatic { tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} indelCandidatesVcf: {description: "An indel candidates VCF file from manta.", category: "advanced"} indelCandidatesVcfIndex: {description: "The index for the indel candidates VCF file.", category: "advanced"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { @@ -165,4 +165,4 @@ task Somatic { exclude: ["doNotDefineThis"] } } -} \ No newline at end of file +} diff --git a/stringtie.wdl b/stringtie.wdl index 5ed62dea..fff4140c 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -24,9 +24,10 @@ task Stringtie { input { File bam File bamIndex - File? referenceGtf Boolean skipNovelTranscripts = false String assembledTranscriptsFile + + File? referenceGtf Boolean? firstStranded Boolean? secondStranded String? geneAbundanceFile @@ -64,19 +65,19 @@ task Stringtie { } parameter_meta { + # inputs bam: {description: "The input BAM file.", category: "required"} bamIndex: {description: "The input BAM file's index.", category: "required"} - referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} skipNovelTranscripts: {description: "Whether new transcripts should be assembled or not.", category: "common"} assembledTranscriptsFile: {description: "Where the output of the assembly should be written.", category: "required"} + referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"} secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"} geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -84,13 +85,14 @@ task Merge { input { Array[File]+ gtfFiles String outputGtfPath + Boolean keepMergedTranscriptsWithRetainedIntrons = false + File? guideGtf Int? minimumLength Float? minimumCoverage Float? minimumFPKM Float? minimumTPM Float? minimumIsoformFraction - Boolean keepMergedTranscriptsWithRetainedIntrons = false String? label String memory = "10G" @@ -125,19 +127,19 @@ task Merge { } parameter_meta { + # inputs gtfFiles: {description: "The GTF files produced by stringtie.", category: "required"} outputGtfPath: {description: "Where the output should be written.", category: "required"} + keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} guideGtf: {description: "Equivalent to the -G option of 'stringtie --merge'.", category: "advanced"} minimumLength: {description: "Equivalent to the -m option of 'stringtie --merge'.", category: "advanced"} minimumCoverage: {description: "Equivalent to the -c option of 'stringtie --merge'.", category: "advanced"} minimumFPKM: {description: "Equivalent to the -F option of 'stringtie --merge'.", category: "advanced"} minimumTPM: {description: "Equivalent to the -T option of 'stringtie --merge'.", category: "advanced"} minimumIsoformFraction: {description: "Equivalent to the -f option of 'stringtie --merge'.", category: "advanced"} - keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} label: {description: "Equivalent to the -l option of 'stringtie --merge'.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/survivor.wdl b/survivor.wdl index b9583009..c7b31058 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -32,6 +30,7 @@ task Merge { Boolean distanceBySvSize = false Int minSize = 30 String outputPath = "./survivor/merged.vcf" + String memory = "24G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" @@ -64,15 +63,15 @@ task Merge { parameter_meta { # inputs - filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "required"} - breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "advanced"} - suppVecs: {description: "The minimum number of SV callers to support the merging", category: "advanced"} - svType: {description: "A boolean to include the type SV to be merged", category: "advanced"} - strandType: {description: "A boolean to include strand type of an SV to be merged", category: "advanced"} - distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "advanced"} - minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} + filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR.", category: "required"} + breakpointDistance: {description: "The distance between pairwise breakpoints between SVs.", category: "advanced"} + suppVecs: {description: "The minimum number of SV callers to support the merging.", category: "advanced"} + svType: {description: "A boolean to include the type SV to be merged.", category: "advanced"} + strandType: {description: "A boolean to include strand type of an SV to be merged.", category: "advanced"} + distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size.", category: "advanced"} + minSize: {description: "The mimimum size of SV to be merged.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The memory required to run the programs", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/talon.wdl b/talon.wdl index c11ab9e0..61f5eb4a 100644 --- a/talon.wdl +++ b/talon.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 79661307..efdd95f4 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -54,10 +54,10 @@ task GetSJsFromGtf { parameter_meta { # inputs - gtfFile: {description: "Input gtf file", category: "required"} - genomeFile: {description: "Reference genome", category: "required"} - minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} + gtfFile: {description: "Input gtf file.", category: "required"} + genomeFile: {description: "Reference genome.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -97,7 +97,7 @@ task GetTranscriptCleanStats { parameter_meta { # inputs - inputSam: {description: "Output sam file from transcriptclean", category: "required"} + inputSam: {description: "Output sam file from transcriptclean.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -189,8 +189,7 @@ task TranscriptClean { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs fastaFile: {description: "Fasta file containing corrected reads."} diff --git a/umi-tools.wdl b/umi-tools.wdl index c5f3b145..7b0a3991 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2017 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -26,9 +26,10 @@ task Extract { File? read2 String bcPattern String? bcPattern2 - Boolean threePrime = false String read1Output = "umi_extracted_R1.fastq.gz" String? read2Output = "umi_extracted_R2.fastq.gz" + Boolean threePrime = false + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -50,21 +51,21 @@ task Extract { } runtime { - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs read1: {description: "The first/single-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bcPattern: {description: "The pattern to be used for UMI extraction. See the umi_tools docs for more information.", category: "required"} bcPattern2: {description: "The pattern to be used for UMI extraction in the second-end reads. See the umi_tools docs for more information.", category: "advanced"} - threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} read1Output: {description: "The location to write the first/single-end output fastq file to.", category: "advanced"} read2Output: {description: "The location to write the second-end output fastq file to.", category: "advanced"} + threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -72,15 +73,15 @@ task Dedup { input { File inputBam File inputBamIndex - String? umiSeparator String outputBamPath - String? statsPrefix Boolean paired = true + String? umiSeparator + String? statsPrefix + String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - - # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) + # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -107,21 +108,21 @@ task Dedup { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} - statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} - umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} + umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} + statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} memory: {description: "The amount of memory required for the task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/unicycler.wdl b/unicycler.wdl index fc393603..938d0c7e 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -22,12 +22,13 @@ version 1.0 task Unicycler { input { + String out + String? preCommand File? short1 File? short2 File? unpaired File? long - String out Int? verbosity Int? minFastaLength Int? keep @@ -125,4 +126,4 @@ task Unicycler { cpu: threads memory: memory } -} \ No newline at end of file +} diff --git a/vardict.wdl b/vardict.wdl index 92beb32e..fc37c9ef 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -27,29 +27,28 @@ task VarDict { String tumorSampleName File tumorBam File tumorBamIndex - String? normalSampleName - File? normalBam - File? normalBamIndex File referenceFasta File referenceFastaFai File bedFile String outputVcf - - Int chromosomeColumn = 1 - Int startColumn = 2 - Int endColumn = 3 - Int geneColumn = 4 - Boolean outputCandidateSomaticOnly = true Boolean outputAllVariantsAtSamePosition = true Float mappingQuality = 20 Int minimumTotalDepth = 8 Int minimumVariantDepth = 4 Float minimumAlleleFrequency = 0.02 + Int chromosomeColumn = 1 + Int startColumn = 2 + Int endColumn = 3 + Int geneColumn = 4 + + String? normalSampleName + File? normalBam + File? normalBamIndex + String javaXmx = "16G" Int threads = 1 String memory = "18G" - String javaXmx = "16G" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } @@ -93,33 +92,31 @@ task VarDict { } parameter_meta { + # inputs tumorSampleName: {description: "The name of the tumor/case sample.", category: "required"} tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} - normalSampleName: {description: "The name of the normal/control sample.", category: "common"} - normalBam: {description: "The normal/control sample's BAM file.", category: "common"} - normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} bedFile: {description: "A bed file describing the regions to operate on. These regions must be below 1e6 bases in size.", category: "required"} outputVcf: {description: "The location to write the output VCF file to.", category: "required"} - chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"} - startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"} - endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"} - geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"} outputCandidateSomaticOnly: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-M` flag.", category: "advanced"} outputAllVariantsAtSamePosition: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-A` flag.", category: "advanced"} mappingQuality: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-Q` option.", category: "advanced"} minimumTotalDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-d` option.", category: "advanced"} minimumVariantDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-v` option.", category: "advanced"} minimumAlleleFrequency: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-f` option.", category: "advanced"} - + chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"} + startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"} + endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"} + geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"} + normalSampleName: {description: "The name of the normal/control sample.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/vt.wdl b/vt.wdl index 99cc1318..95585ff2 100644 --- a/vt.wdl +++ b/vt.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -28,9 +28,10 @@ task Normalize { File referenceFastaFai Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" - String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + String memory = "4G" Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" } command { @@ -56,13 +57,12 @@ task Normalize { # inputs inputVCF: {description: "The VCF file to process.", category: "required"} inputVCFIndex: {description: "The index of the VCF file to be processed.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/whatshap.wdl b/whatshap.wdl index 93624590..5c69400a 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -20,10 +20,14 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - task Phase { input { String outputVCF + File vcf + File vcfIndex + File phaseInput + File phaseInputIndex + File? reference File? referenceIndex String? tag @@ -33,20 +37,15 @@ task Phase { String? chromosome String? threshold String? ped - File vcf - File vcfIndex - File phaseInput - File phaseInputIndex String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e - whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -69,24 +68,27 @@ task Phase { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} - reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} - tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} - algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} - indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} + vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed).", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"} + phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF).", category: "required"} + phaseInputIndex: {description: "Index of BAM, CRAM, VCF or BCF file(s) with phase information.", category: "required"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"} + referenceIndex: {description: "Index of reference file.", category: "common"} + tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS).", category: "common"} + algorithm: {description: "Phasing algorithm to use (default: {description: whatshap).", category: "advanced"} + indels: {description: "Also phase indels (default: {description: do not phase indels).", category: "common"} sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} - vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} - vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} - phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -95,16 +97,17 @@ task Phase { task Stats { input { + File vcf + String? gtf String? sample String? tsv String? blockList String? chromosome - File vcf String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } @@ -125,18 +128,19 @@ task Stats { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - gtf: "Write phased blocks to GTF file." - sample: "Name of the sample to process. If not given, use first sample found in VCF." - tsv: "Filename to write statistics to (tab-separated)." - blockList: "Filename to write list of all blocks to (one block per line)." - chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." - vcf: "Phased VCF file" + # inputs + vcf: {description: "Phased VCF file.", category: "required"} + gtf: {description: "Write phased blocks to GTF file.", category: "common"} + sample: {description: "Name of the sample to process. If not given, use first sample found in VCF.", category: "common"} + tsv: {description: "Filename to write statistics to (tab-separated).", category: "common"} + blockList: {description: "Filename to write list of all blocks to (one block per line).", category: "advanced"} + chromosome: {description: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -145,57 +149,58 @@ task Stats { task Haplotag { input { + File vcf + File vcfIndex + File alignments + File alignmentsIndex String outputFile + File? reference File? referenceFastaIndex String? regions String? sample - File vcf - File vcfIndex - File alignments - File alignmentsIndex String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e - whatshap haplotag \ - ~{vcf} \ - ~{alignments} \ - ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ - ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ - ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} - - python3 -c "import pysam; pysam.index('~{outputFile}')" + ~{vcf} \ + ~{alignments} \ + ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} + + python3 -c "import pysam; pysam.index('~{outputFile}')" } output { - File bam = outputFile - File bamIndex = outputFile + ".bai" + File bam = outputFile + File bamIndex = outputFile + ".bai" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - outputFile: "Output file. If omitted, use standard output." - reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." - referenceFastaIndex: "Index for the reference file." - regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." - sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." - vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." - vcfIndex: "Index for the VCF or BCF file with variants to be phased." - alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype." - alignmentsIndex: "Index for the alignment file." + # inputs + vcf: {description: "VCF file with phased variants (must be gzip-compressed and indexed).", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"} + alignments: {description: "File (BAM/CRAM) with read alignments to be tagged by haplotype.", category: "required"} + alignmentsIndex: {description: "Index for the alignment file.", category: "required"} + outputFile: {description: "Output file. If omitted, use standard output.", category: "required"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"} + referenceFastaIndex: {description: "Index for the reference file.", category: "common"} + regions: {description: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome).", category: "advanced"} + sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/wisestork.wdl b/wisestork.wdl index 0fd812b1..6be32168 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -22,13 +22,15 @@ version 1.0 task Count { input { - Int? binSize - File reference - File referenceIndex - File? binFile File inputBam File inputBamIndex + File reference + File referenceIndex String outputBed = "output.bed" + + Int? binSize + File? binFile + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -54,15 +56,17 @@ task Count { task GcCorrect { input { - Int? binSize File reference File referenceIndex - File? binFile File inputBed String outputBed = "output.bed" + + Int? binSize + File? binFile Float? fracN Int? iter Float? fracLowess + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -91,13 +95,16 @@ task GcCorrect { task Newref { input { - Int? binSize File reference File referenceIndex - File? binFile Array[File]+ inputBeds String outputBed = "output.bed" + + Int? binSize + File? binFile Int? nBins + + Int memory = 2 + ceil(length(inputBeds) * 0.15) String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -106,36 +113,36 @@ task Newref { mkdir -p $(dirname ~{outputBed}) wisestork newref \ ~{"--binsize " + binSize} \ - --reference ~{reference} \ - ~{"--bin-file " + binFile} \ - --output ~{outputBed} \ - -I ~{sep=" -I " inputBeds} \ - ~{"--n-bins " + nBins} + --reference ~{reference} \ + ~{"--bin-file " + binFile} \ + --output ~{outputBed} \ + -I ~{sep=" -I " inputBeds} \ + ~{"--n-bins " + nBins} } output { File bedFile = outputBed } - Int memory = 2 + ceil(length(inputBeds) * 0.15) - runtime { - docker: dockerImage memory: "~{memory}G" + docker: dockerImage } } task Zscore { input { - Int? binSize File reference File referenceIndex - File? binFile File inputBed File inputBedIndex File dictionaryFile File dictionaryFileIndex String outputBed = "output.bed" + + Int? binSize + File? binFile + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -159,4 +166,3 @@ task Zscore { docker: dockerImage } } - From f34613058333fbc3a523ef513fdc6026cdd87378 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 17:21:20 +0100 Subject: [PATCH 369/902] Update lima to match isoseq3 and ccs changes. --- CHANGELOG.md | 1 + lima.wdl | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7d7fed7..9bff5f3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ version 5.0.0-dev + Bwa & bwa-mem2: Add parameter_meta for `outputHla`. + Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. + Bam2fastx: Add localisation of input files to Bam2Fasta task. ++ Lima: `cores` input has been renamed to `threads` to match tool naming. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. + Add PacBio preprocessing specific tasks `mergePacBio` & `ccsChunks`. diff --git a/lima.wdl b/lima.wdl index 1da4ef5e..33b2328b 100644 --- a/lima.wdl +++ b/lima.wdl @@ -48,7 +48,7 @@ task Lima { File barcodeFile String outputPrefix - Int cores = 2 + Int threads = 2 String memory = "2G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" @@ -82,7 +82,7 @@ task Lima { --guess-min-count ~{guessMinCount} \ ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ - --num-threads ~{cores} \ + --num-threads ~{threads} \ ~{"--log-file " + outputPrefix + ".stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ @@ -110,7 +110,7 @@ task Lima { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -143,7 +143,7 @@ task Lima { inputBamFile: {description: "Bam input file.", category: "required"} barcodeFile: {description: "Barcode/primer fasta file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From b96ec320ded2fec077f358460376bba1582337ac Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:21:20 +0100 Subject: [PATCH 370/902] Update bwa.wdl. Co-authored-by: Davy Cats --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 0f09f7a9..ee01957e 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -33,8 +33,8 @@ task Mem { String? readgroup Int? sortThreads + Int? memoryGb - Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 From adf58a85569ca3335874b5cf55bf86933aacbb8e Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:22:19 +0100 Subject: [PATCH 371/902] Update bwa-mem2.wdl. Co-authored-by: Davy Cats --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 34cd38a6..89a48fbd 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -33,8 +33,8 @@ task Mem { String? readgroup Int? sortThreads + Int? memoryGb - Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 From a5aa0fef74bbadb4ea1562ebf65e860975dc3fbe Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:22:56 +0100 Subject: [PATCH 372/902] Update bwa-mem2.wdl --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 89a48fbd..b4ca877a 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 + # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } From d35543c91eba6179b6738f9ac3eb412ded0f60a6 Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:23:12 +0100 Subject: [PATCH 373/902] Update bwa.wdl --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index ee01957e..f4061729 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 + # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } From 38514b1db4049d1127f03e4c888027c99c2b9bcf Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:25:29 +0100 Subject: [PATCH 374/902] Update bam2fastx.wdl --- bam2fastx.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 2ad08581..2ae22a57 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -58,7 +58,7 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " bam} + $bamFiles } output { From 8a02fc35c76674d7b2b7e1d4b9addaaaea58e9ff Mon Sep 17 00:00:00 2001 From: Jasper Date: Wed, 4 Nov 2020 08:49:03 +0100 Subject: [PATCH 375/902] Update bwa-mem2.wdl --- bwa-mem2.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index b4ca877a..4566e68c 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -34,8 +34,8 @@ task Mem { String? readgroup Int? sortThreads - Int? memoryGb Int threads = 4 + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" @@ -101,8 +101,8 @@ task Mem { compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From c8e043006f744f23155d0fba00ebec962bf5c910 Mon Sep 17 00:00:00 2001 From: Jasper Date: Wed, 4 Nov 2020 08:49:50 +0100 Subject: [PATCH 376/902] Update bwa.wdl --- bwa.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index f4061729..e87fd82a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -33,9 +33,9 @@ task Mem { String? readgroup Int? sortThreads - - Int? memoryGb + Int threads = 4 + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" @@ -96,8 +96,8 @@ task Mem { compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4a2f3366cb5f0cd57bfab8da01369c29c6a35063 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 5 Nov 2020 16:19:31 +0100 Subject: [PATCH 377/902] add tasks for amber and cobalt, group tasks from hmftools in one file --- gripss.wdl | 122 ----------------------------------------------------- sage.wdl | 100 ------------------------------------------- 2 files changed, 222 deletions(-) delete mode 100644 gripss.wdl delete mode 100644 sage.wdl diff --git a/gripss.wdl b/gripss.wdl deleted file mode 100644 index c9a8f27d..00000000 --- a/gripss.wdl +++ /dev/null @@ -1,122 +0,0 @@ -version 1.0 - -# Copyright (c) 2020 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -task ApplicationKt { - input { - File inputVcf - String outputPath = "gripss.vcf.gz" - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File breakpointHotspot - File breakendPon - File breakpointPon - - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssApplicationKt \ - -ref_genome ~{referenceFasta} \ - -breakpoint_hotspot ~{breakpointHotspot} \ - -breakend_pon ~{breakendPon} \ - -breakpoint_pon ~{breakpointPon} \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} - breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task HardFilterApplicationKt { - input { - File inputVcf - String outputPath = "gripss_hard_filter.vcf.gz" - - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} \ No newline at end of file diff --git a/sage.wdl b/sage.wdl deleted file mode 100644 index ab42bee8..00000000 --- a/sage.wdl +++ /dev/null @@ -1,100 +0,0 @@ -version 1.0 - -# Copyright (c) 2020 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -task Sage { - input { - String tumorName - File tumorBam - File tumorBamIndex - String? normalName - File? normalBam - File? normalBamIndex - File referenceFasta - File referenceFastaDict - File referenceFastaFai - File hotspots - File panelBed - File highConfidenceBed - Boolean hg38 = false - String outputPath = "./sage.vcf.gz" - - Int threads = 2 - String javaXmx = "32G" - String memory = "33G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ - -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{true="hg38" false="hg19" hg38} \ - -threads ~{threads} \ - -out ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. - # This seems to be a systemic issue with R generated plots in biocontainers... - } - - runtime { - time_minutes: timeMinutes # !UnknownRuntimeKey - cpu: threads - docker: dockerImage - memory: memory - } - - parameter_meta { - tumorName: {description: "The name of the tumor sample.", category: "required"} - tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} - highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} From 333f052f344b331591797bccbe45028c6882b770 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 11:44:09 +0100 Subject: [PATCH 378/902] Update first set of parameter_meta. --- CHANGELOG.md | 1 + TO-DO.md | 13 +++++++ bcftools.wdl | 15 ++++++++ bedtools.wdl | 32 ++++++++++++++++ biopet/bamstats.wdl | 11 +++--- biopet/biopet.wdl | 85 ++++++++++++++++++++++------------------- biopet/sampleconfig.wdl | 21 +++++----- biopet/seqstat.wdl | 9 +++-- biowdl.wdl | 3 ++ bowtie.wdl | 4 ++ chunked-scatter.wdl | 6 +++ 11 files changed, 143 insertions(+), 57 deletions(-) create mode 100644 TO-DO.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bff5f3c..b7a8741f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Complete `parameter_meta` for tasks missing the outputs. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. diff --git a/TO-DO.md b/TO-DO.md new file mode 100644 index 00000000..cc76a5d6 --- /dev/null +++ b/TO-DO.md @@ -0,0 +1,13 @@ +#TO DO +## Requires parameter_meta: +* biopet.wdl: `ExtractAdaptersFastqc`. + +## Duplicate tasks: +* + +## Out of date with new cluster & parameter_meta: +* bamstats.wdl: `Generate`. +* biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, + `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats`. +* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl`. +* seqstat.wdl: `Generate`. diff --git a/bcftools.wdl b/bcftools.wdl index 41825747..28380dea 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,6 +118,10 @@ task Annotate { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Annotated VCF file."} + outputVcfIndex: {description: "Index of the annotated VCF file."} } } @@ -165,6 +169,10 @@ task Sort { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file."} + outputVcfIndex: {description: "Index of sorted VCF file."} } } @@ -272,6 +280,9 @@ task Stats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + stats: {description: "Text file stats which is suitable for machine processing and can be plotted using plot-vcfstats."} } } @@ -316,5 +327,9 @@ task View { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "VCF file."} + outputVcfIndex: {description: "Index of VCF file."} } } diff --git a/bedtools.wdl b/bedtools.wdl index b7a03c17..3dbf93cb 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -60,6 +60,9 @@ task Complement { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + complementBed: {description: "All intervals in a genome that are not covered by at least one interval in the input file."} } } @@ -95,6 +98,9 @@ task Merge { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedBed: {description: "Merged bed file."} } } @@ -132,6 +138,9 @@ task MergeBedFiles { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedBed: {description: "Merged bed file."} } } @@ -179,6 +188,26 @@ task Sort { time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + inputBed: {description: "The bed to sort.", category: "required"} + sizeA: {description: "Sort by feature size in ascending order.", category: "common"} + sizeD: {description: "Sort by feature size in descending order.", category: "common"} + chrThenSizeA: {description: "Sort by chromosome (asc), then by feature size (asc).", category: "common"} + chrThenSizeD: {description: "Sort by chromosome (asc), then by feature size (desc).", category: "common"} + chrThenScoreA: {description: "Sort by chromosome (asc), then by score (asc).", category: "common"} + chrThenScoreD: {description: "Sort by chromosome (asc), then by score (desc).", category: "common"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + genome: {description: "Define sort order by order of tab-delimited file with chromosome names in the first column.", category: "advanced"} + faidx: {description: "Define sort order by order of tab-delimited file with chromosome names in the first column. Sort by specified chromosome order.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + sortedBed: {description: "The sorted bed file."} + } } task Intersect { @@ -226,5 +255,8 @@ task Intersect { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + intersectedBed: {description: "The intersected bed file."} } } diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index d71355d3..d01bc10c 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -24,18 +24,19 @@ import "../common.wdl" as common task Generate { input { - String? preCommand - File? toolJar IndexedBamFile bam - File? bedFile Boolean scatterMode = false Boolean onlyUnmapped = false Boolean tsvOutputs = false String outputDir + + String? preCommand + File? toolJar + File? bedFile Reference? reference - String memory = "9G" String javaXmx = "8G" + String memory = "9G" } File referenceFasta = if defined(reference) then select_first([reference]).fasta else "" @@ -66,4 +67,4 @@ task Generate { runtime { memory: memory } -} \ No newline at end of file +} diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 89319409..07f51e67 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -24,15 +24,16 @@ import "../common.wdl" task BaseCounter { input { - String? preCommand - File? toolJar IndexedBamFile bam File refFlat String outputDir String prefix - String memory = "5G" + String? preCommand + File? toolJar + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -98,16 +99,17 @@ task ExtractAdaptersFastqc { String outputDir String adapterOutputFilePath = outputDir + "/adapter.list" String contamsOutputFilePath = outputDir + "/contaminations.list" + Boolean? skipContams File? knownContamFile File? knownAdapterFile Float? adapterCutoff Boolean? outputAsFasta - String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" + String memory = "9G" Int timeMinutes = 5 + String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" } command { @@ -133,20 +135,21 @@ task ExtractAdaptersFastqc { runtime { memory: memory - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } } task FastqSplitter { input { - String? preCommand File inputFastq Array[String]+ outputPaths + + String? preCommand File? toolJar - String memory = "5G" String javaXmx = "4G" + String memory = "5G" String dockerImage = "quay.io/biocontainers/biopet-fastqsplitter:0.1--2" } @@ -170,15 +173,16 @@ task FastqSplitter { task FastqSync { input { - String? preCommand FastqPair refFastq FastqPair inputFastq String out1path String out2path + + String? preCommand File? toolJar - String memory = "5G" String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -200,8 +204,8 @@ task FastqSync { output { FastqPair out1 = object { - R1: out1path, - R2: out2path + R1: out1path, + R2: out2path } } @@ -215,14 +219,15 @@ task ScatterRegions { File referenceFasta File referenceFastaDict Int scatterSizeMillions = 1000 + Boolean notSplitContigs = false + Int? scatterSize File? regions - Boolean notSplitContigs = false File? bamFile File? bamIndex - String memory = "1G" String javaXmx = "500M" + String memory = "1G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" } @@ -264,41 +269,40 @@ task ScatterRegions { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} + notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", category: "advanced"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} regions: {description: "The regions to be scattered.", category: "advanced"} - notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", - category: "advanced"} - bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", - category: "advanced"} + bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", category: "advanced"} bamIndex: {description: "The index for the bamfile given through bamFile.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + scatters: {description: "Smaller scatter regions of equal size."} } } task ValidateAnnotation { input { + Reference reference + File? refRefflat File? gtfFile - Reference reference - String memory = "4G" String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/biopet-validateannotation:0.1--0" } @@ -323,8 +327,9 @@ task ValidateFastq { input { File read1 File? read2 - String memory = "4G" + String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--1" } @@ -348,8 +353,9 @@ task ValidateVcf { input { IndexedVcfFile vcf Reference reference - String memory = "4G" + String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/biopet-validatevcf:0.1--0" } @@ -374,12 +380,6 @@ task VcfStats { IndexedVcfFile vcf Reference reference String outputDir - File? intervals - Array[String]+? infoTags - Array[String]+? genotypeTags - Int? sampleToSampleMinDepth - Int? binSize - Int? maxContigsInSingleJob Boolean writeBinStats = false Int localThreads = 1 Boolean notWriteContigStats = false @@ -387,13 +387,20 @@ task VcfStats { Boolean skipGenotype = false Boolean skipSampleDistributions = false Boolean skipSampleCompare = false + + File? intervals + Array[String]+? infoTags + Array[String]+? genotypeTags + Int? sampleToSampleMinDepth + Int? binSize + Int? maxContigsInSingleJob String? sparkMaster Int? sparkExecutorMemory Array[String]+? sparkConfigValues - String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" - String memory = "5G" String javaXmx = "4G" + String memory = "5G" + String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" } command { diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl index 2b36952b..f3955658 100644 --- a/biopet/sampleconfig.wdl +++ b/biopet/sampleconfig.wdl @@ -24,18 +24,19 @@ import "../common.wdl" as common task SampleConfig { input { - File? toolJar - String? preCommand Array[File]+ inputFiles String keyFilePath + + File? toolJar + String? preCommand String? sample String? library String? readgroup String? jsonOutputPath String? tsvOutputPath - String memory = "17G" String javaXmx = "16G" + String memory = "17G" } String toolCommand = if defined(toolJar) @@ -69,13 +70,14 @@ task SampleConfig { task SampleConfigCromwellArrays { input { - File? toolJar - String? preCommand Array[File]+ inputFiles String outputPath - String memory = "5G" + File? toolJar + String? preCommand + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -102,16 +104,17 @@ task SampleConfigCromwellArrays { task CaseControl { input { - File? toolJar - String? preCommand Array[File]+ inputFiles Array[File]+ inputIndexFiles Array[File]+ sampleConfigs String outputPath String controlTag = "control" - String memory = "5G" + File? toolJar + String? preCommand + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl index e3a55ec3..c2eb5866 100644 --- a/biopet/seqstat.wdl +++ b/biopet/seqstat.wdl @@ -24,16 +24,17 @@ import "../common.wdl" as common task Generate { input { - String? preCommand - File? toolJar FastqPair fastq String outputFile String sample String library String readgroup - String memory = "5G" + String? preCommand + File? toolJar + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -60,4 +61,4 @@ task Generate { runtime { memory: memory } -} \ No newline at end of file +} diff --git a/biowdl.wdl b/biowdl.wdl index 8a1f9dfd..06b1d756 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -68,5 +68,8 @@ task InputConverter { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + json: {description: "JSON file version of the input sample sheet."} } } diff --git a/bowtie.wdl b/bowtie.wdl index 7fb1b614..87210dcd 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -100,5 +100,9 @@ task Bowtie { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Output alignment file."} + outputBamIndex: {description: "Index of output alignment file."} } } diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 844d6990..fba1af5a 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -68,6 +68,9 @@ task ChunkedScatter { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + scatters: {description: "Overlapping chunks of a given size in new bed files."} } } @@ -118,5 +121,8 @@ task ScatterRegions { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + scatters: {description: "Bed file where the contigs add up approximately to the given scatter size."} } } From 9d68eb40b045b859cb2619b0f1cec1d2f7437f2e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 12:08:06 +0100 Subject: [PATCH 379/902] Update more tasks. --- .github/PULL_REQUEST_TEMPLATE.md | 3 ++- TO-DO.md | 19 ++++++++++++++----- clever.wdl | 5 ++++- collect-columns.wdl | 3 +++ common.wdl | 6 ++++++ 5 files changed, 29 insertions(+), 7 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1d52f502..3b4ec9ac 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,4 @@ ### Checklist - [ ] Pull request details were added to CHANGELOG.md. -- [ ] `parameter_meta` for each task is up to date. +- [ ] Documentation was updated (if required). +- [ ] `parameter_meta` was added/updated (if required). diff --git a/TO-DO.md b/TO-DO.md index cc76a5d6..7a18bb33 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,13 +1,22 @@ #TO DO ## Requires parameter_meta: -* biopet.wdl: `ExtractAdaptersFastqc`. +* biopet.wdl: `ExtractAdaptersFastqc` ## Duplicate tasks: * ## Out of date with new cluster & parameter_meta: -* bamstats.wdl: `Generate`. +* bamstats.wdl: `Generate` * biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, - `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats`. -* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl`. -* seqstat.wdl: `Generate`. + `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats` +* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl` +* seqstat.wdl: `Generate` +* common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, + `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` + +## Imports other tasks: +* bamstats.wdl +* biopet.wdl +* sampleconfig.wdl +* seqstat.wdl +* clever.wdl diff --git a/clever.wdl b/clever.wdl index 75e889b3..186be514 100644 --- a/clever.wdl +++ b/clever.wdl @@ -80,6 +80,9 @@ task Mateclever { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + matecleverVcf: {description: "VCF with additional mateclever results."} } } @@ -132,6 +135,6 @@ task Prediction { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - predictions: {description: "The predicted deletions (VCF) from clever.", category: "advanced"} + predictions: {description: "The predicted deletions (VCF) from clever."} } } diff --git a/collect-columns.wdl b/collect-columns.wdl index 67db6179..3d65c7e7 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -83,5 +83,8 @@ task CollectColumns { memoryGb: {description: "The maximum amount of memory the job will need in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputTable: {description: "All input columns combined into one table."} } } diff --git a/common.wdl b/common.wdl index b3878bb6..66bdb99c 100644 --- a/common.wdl +++ b/common.wdl @@ -207,6 +207,9 @@ task TextToFile { outputFile: {description: "The name of the output file.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + out: {description: "File containing input text."} } } @@ -251,6 +254,9 @@ task YamlToJson { memory: {description: "The maximum amount of memory the job will need.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + json: {description: "JSON file version of input YAML."} } } From 3123947972f8a4cb288f96e539e143b40e3e136a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 13:04:35 +0100 Subject: [PATCH 380/902] Update even more tasks. --- CPAT.wdl | 3 +++ TO-DO.md | 12 ++++++------ biopet/biopet.wdl | 23 +++++++++++++++++++++++ cutadapt.wdl | 14 ++++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index d97031dc..afb67853 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -76,6 +76,9 @@ task CPAT { stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outFile: {description: "CPAT logistic regression model."} } } diff --git a/TO-DO.md b/TO-DO.md index 7a18bb33..be118b70 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,10 +1,4 @@ #TO DO -## Requires parameter_meta: -* biopet.wdl: `ExtractAdaptersFastqc` - -## Duplicate tasks: -* - ## Out of date with new cluster & parameter_meta: * bamstats.wdl: `Generate` * biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, @@ -20,3 +14,9 @@ * sampleconfig.wdl * seqstat.wdl * clever.wdl + +## Requires parameter_meta: +* + +## Duplicate tasks: +* diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 07f51e67..e6619e09 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -138,6 +138,29 @@ task ExtractAdaptersFastqc { time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + inputFile: {description: "Input fastq file.", category: "required"} + outputDir: {description: "The path to which the output should be written.", category: "required"} + adapterOutputFilePath: {description: "Output file for adapters, if not supplied output will go to stdout.", category: "common"} + contamsOutputFilePath: {description: "Output file for contaminations, if not supplied output will go to stdout.", category: "common"} + skipContams: {description: "If this is set only the adapters block is used, other wise contaminations is also used.", category: "advanced"} + knownContamFile: {description: "This file should contain the known contaminations from fastqc.", category: ""advanced} + knownAdapterFile: {description: "This file should contain the known adapters from fastqc.", category: "advanced"} + adapterCutoff: {description: "The fraction of the adapters in a read should be above this fraction, default is 0.001.", category: "advanced"} + outputAsFasta: {description: "Output in fasta format, default only sequences.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + adapterOutputFile: {description: "Output file with adapters."} + contamsOutputFile: {description: "Output file with contaminations."} + adapterList: {description: "List of adapters."} + contamsList: {description: "List of contaminations."} + } } task FastqSplitter { diff --git a/cutadapt.wdl b/cutadapt.wdl index 74f57912..8d409c3d 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -235,5 +235,19 @@ task Cutadapt { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + cutRead1: {description: ""} + report: {description: ""} + cutRead2: {description: ""} + tooLongOutput: {description: ""} + tooShortOutput: {description: ""} + untrimmedOutput: {description: ""} + tooLongPairedOutput: {description: ""} + tooShortPairedOutput: {description: ""} + untrimmedPairedOutput: {description: ""} + infoFile: {description: ""} + restFile: {description: ""} + wildcardFile: {description: ""} } } From e76c0c02f410eb8b8f9b09b9cbccb125930d4c35 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 13:50:40 +0100 Subject: [PATCH 381/902] Upload more tasks. --- cutadapt.wdl | 24 ++++++++++++------------ deepvariant.wdl | 7 +++++++ delly.wdl | 3 +++ fastqc.wdl | 12 ++++++++++++ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index 8d409c3d..b2dbdec0 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -237,17 +237,17 @@ task Cutadapt { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - cutRead1: {description: ""} - report: {description: ""} - cutRead2: {description: ""} - tooLongOutput: {description: ""} - tooShortOutput: {description: ""} - untrimmedOutput: {description: ""} - tooLongPairedOutput: {description: ""} - tooShortPairedOutput: {description: ""} - untrimmedPairedOutput: {description: ""} - infoFile: {description: ""} - restFile: {description: ""} - wildcardFile: {description: ""} + cutRead1: {description: "Trimmed read one."} + report: {description: "Per-adapter statistics file."} + cutRead2: {description: "Trimmed read two in pair."} + tooLongOutput: {description: "Reads that are too long according to -M."} + tooShortOutput: {description: "Reads that are too short according to -m."} + untrimmedOutput: {description: "All reads without adapters (instead of the regular output file)."} + tooLongPairedOutput: {description: "Second reads in a pair."} + tooShortPairedOutput: {description: "Second reads in a pair."} + untrimmedPairedOutput: {description: "The second reads in a pair that were not trimmed."} + infoFile: {description: "Detailed information about where adapters were found in each read."} + restFile: {description: "The rest file."} + wildcardFile: {description: "The wildcard file."} } } diff --git a/deepvariant.wdl b/deepvariant.wdl index 8b08e111..f71a1c88 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -90,5 +90,12 @@ task RunDeepVariant { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVCF: {description: "Output VCF file."} + outputVCFIndex: {description: "Index of output VCF file."} + outputVCFStatsReport: {description: "Statistics file."} + outputGVCF: {description: "GVCF version of VCF file(s)."} + outputGVCFIndex: {description: "Index of GVCF file(s)."} } } diff --git a/delly.wdl b/delly.wdl index ffe9023a..bf00ed36 100644 --- a/delly.wdl +++ b/delly.wdl @@ -62,5 +62,8 @@ task CallSV { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dellyBcf: {description: "File containing structural variants."} } } diff --git a/fastqc.wdl b/fastqc.wdl index feeeaae5..7ca0baa8 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -125,6 +125,13 @@ task Fastqc { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + htmlReport: {description: ""} + reportZip: {description: ""} + summary: {description: ""} + rawReport: {description: ""} + images: {description: ""} } meta { @@ -167,5 +174,10 @@ task GetConfiguration { # inputs timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + adapterList: {description: ""} + contaminantList: {description: ""} + limits: {description: ""} } } From 091c3e313077aff989a43e13052625330646ad47 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 17:12:40 +0100 Subject: [PATCH 382/902] Upload another batch. --- TO-DO.md | 2 ++ fastqc.wdl | 16 ++++++++-------- gatk.wdl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index be118b70..8c1723b6 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -7,6 +7,8 @@ * seqstat.wdl: `Generate` * common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` +* fastqsplitter.wdl: `Fastqsplitter` +* flash.wdl: `Flash` ## Imports other tasks: * bamstats.wdl diff --git a/fastqc.wdl b/fastqc.wdl index 7ca0baa8..973eeed9 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -127,11 +127,11 @@ task Fastqc { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - htmlReport: {description: ""} - reportZip: {description: ""} - summary: {description: ""} - rawReport: {description: ""} - images: {description: ""} + htmlReport: {description: "HTML report file."} + reportZip: {description: "Source data file."} + summary: {description: "Summary file."} + rawReport: {description: "Raw report file."} + images: {description: "Images in report file."} } meta { @@ -176,8 +176,8 @@ task GetConfiguration { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - adapterList: {description: ""} - contaminantList: {description: ""} - limits: {description: ""} + adapterList: {description: "List of adapters found."} + contaminantList: {description: "List of contaminants found."} + limits: {description: "Limits file."} } } diff --git a/gatk.wdl b/gatk.wdl index 7aa2915c..d13c1175 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -78,6 +78,9 @@ task AnnotateIntervals { javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + annotatedIntervals: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a sequence dictionary, a row specifying the column headers for the contained annotations, and the corresponding entry rows."} } } @@ -145,6 +148,11 @@ task ApplyBQSR { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + recalibratedBam: {description: "A BAM file containing the recalibrated read data."} + recalibratedBamIndex: {description: "Index of recalibrated BAM file."} + recalibratedBamMd5: {description: "MD5 of recalibrated BAM file."} } } @@ -211,6 +219,9 @@ task BaseRecalibrator { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + recalibrationReport: {description: "A GATK Report file with many tables."} } } @@ -255,6 +266,10 @@ task CalculateContamination { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + contaminationTable: {description: "Table with fractions of reads from cross-sample contamination."} + mafTumorSegments: {description: "Tumor segments table."} } } @@ -297,6 +312,10 @@ task CallCopyRatioSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + calledSegments: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn, and the corresponding entry rows."} + calledSegmentsIgv: {description: "This is a tab-separated values (TSV) file with CBS-format column headers and the corresponding entry rows that can be plotted using IGV."} } } @@ -353,6 +372,9 @@ task CollectAllelicCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + allelicCounts: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in AllelicCountCollection.AllelicCountTableColumn, and the corresponding entry rows."} } } @@ -410,6 +432,9 @@ task CollectReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + counts: {description: "Read counts at specified intervals."} } } @@ -464,6 +489,10 @@ task CombineGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "A combined multi-sample gVCF."} + outputVcfIndex: {description: "Index of the output file."} } } @@ -535,6 +564,10 @@ task CombineVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + combinedVcf: {description: "Combined VCF file."} + combinedVcfIndex: {description: "Index of combined VCF file."} } } @@ -580,6 +613,9 @@ task CreateReadCountPanelOfNormals { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + PON: {description: "Panel-of-normals file."} } } @@ -630,6 +666,10 @@ task DenoiseReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + standardizedCopyRatios: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CopyRatioCollection.CopyRatioTableColumn, and the corresponding entry rows."} + denoisedCopyRatios: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CopyRatioCollection.CopyRatioTableColumn, and the corresponding entry rows."} } } @@ -700,6 +740,11 @@ task FilterMutectCalls { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + filteredVcf: {description: ""} + filteredVcfIndex: {description: ""} + filteringStats: {description: ""} } } From eeff6ce5e37f75aa508fec3bf9ba38bede23dd17 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 11:01:08 +0100 Subject: [PATCH 383/902] Update gatk with outputs. --- gatk.wdl | 80 +++++++++++++++++++++++++++++++++++++++++++++++--- gffcompare.wdl | 9 ++++++ 2 files changed, 85 insertions(+), 4 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index d13c1175..82244caa 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -581,7 +581,8 @@ task CreateReadCountPanelOfNormals { String javaXmx = "7G" String memory = "8G" Int timeMinutes = 5 - String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason... + # The biocontainer causes a spark related error for some reason. + String dockerImage = "broadinstitute/gatk:4.1.8.0" } command { @@ -742,9 +743,9 @@ task FilterMutectCalls { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - filteredVcf: {description: ""} - filteredVcfIndex: {description: ""} - filteringStats: {description: ""} + filteredVcf: {description: "VCF file with filtered variants from a Mutect2 VCF callset."} + filteredVcfIndex: {description: "Index of output VCF file."} + filteringStats: {description: "The output filtering stats file."} } } @@ -787,6 +788,9 @@ task GatherBqsrReports { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBQSRreport: {description: "Single file with scattered BQSR recalibration reports gathered into one."} } } @@ -840,6 +844,9 @@ task GenomicsDBImport { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + genomicsDbTarArchive: {description: "Imported VCFs to GenomicsDB file."} } } @@ -907,6 +914,10 @@ task GenotypeGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVCF: {description: "A final VCF in which all samples have been jointly genotyped. "} + outputVCFIndex: {description: "Index of final VCF file."} } } @@ -959,6 +970,9 @@ task GetPileupSummaries { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + pileups: {description: "Pileup metrics for inferring contamination."} } } @@ -1047,6 +1061,10 @@ task HaplotypeCaller { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVCF: {description: "Raw, unfiltered SNP and indel calls."} + outputVCFIndex: {description: "Index of output VCF."} } } @@ -1085,6 +1103,9 @@ task LearnReadOrientationModel { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + artifactPriorsTable: {description: "Maximum likelihood estimates of artifact prior probabilities in the orientation bias mixture model filter."} } } @@ -1123,6 +1144,9 @@ task MergeStats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedStats: {description: "Merged stats from scattered Mutect2 runs."} } } @@ -1190,6 +1214,19 @@ task ModelSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + hetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the case sample."} + copyRatioSegments: {description: "It contains the segments from the .modelFinal.seg file converted to a format suitable for input to CallCopyRatioSegments."} + copyRatioCBS: {description: "The posterior medians of the log2 copy ratio."} + alleleFractionCBS: {description: "Minor-allele fraction."} + unsmoothedModeledSegments: {description: "The initial modeled-segments result before segmentation smoothing."} + unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing"} + unsmoothedAlleleFractionParameters: {description: "The initial allele-fraction-model global-parameter result before segmentation smoothing."} + modeledSegments: {description: "The final modeled-segments result after segmentation smoothing."} + copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing"} + alleleFractionParameters: {description: "The final allele-fraction-model global-parameter result after segmentation smoothing."} + normalHetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the matched-normal sample."} } } @@ -1268,6 +1305,12 @@ task MuTect2 { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + vcfFile: {description: "Somatic SNVs and indels called via local assembly of haplotypes."} + vcfFileIndex: {description: "Index for Mutect2 VCF."} + f1r2File: {description: "Contains information that can then be passed to LearnReadOrientationModel, which generate an artifact prior table for each tumor sample for FilterMutectCalls to use."} + stats: {description: "Stats file."} } } @@ -1327,6 +1370,14 @@ task PlotDenoisedCopyRatios { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + denoisedCopyRatiosPlot: {description: "Plot showing the entire range of standardized and denoised copy ratios."} + standardizedMedianAbsoluteDeviation: {description: "Standardized median absolute deviation copy ratios."} + denoisedMedianAbsoluteDeviation: {description: "Denoised median absolute deviation copy ratios."} + deltaMedianAbsoluteDeviation: {description: "The change between `standardizedMedianAbsoluteDeviation` & `denoisedMedianAbsoluteDeviation`."} + deltaScaledMedianAbsoluteDeviation: {description: "The change between `standardizedMedianAbsoluteDeviation` & `denoisedMedianAbsoluteDeviation` scaled by standardized MAD."} + denoisedCopyRatiosLimitedPlot: {description: "Plot showing the standardized and denoised copy ratios limited to ratios within [0, 4]."} } } @@ -1384,6 +1435,9 @@ task PlotModeledSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + modeledSegmentsPlot: {description: "This plot shows the input denoised copy ratios and/or alternate-allele fractions as points, as well as box plots for the available posteriors in each segment."} } } @@ -1443,6 +1497,9 @@ task PreprocessIntervals { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + intervalList: {description: "Preprocessed Picard interval-list file."} } } @@ -1501,6 +1558,10 @@ task SelectVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "A new VCF file containing the selected subset of variants."} + outputVcfIndex: {description: "Index of the new output VCF file."} } } @@ -1555,6 +1616,10 @@ task SplitNCigarReads { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bam: {description: "BAM file with reads split at N CIGAR elements and CIGAR strings updated."} + bamIndex: {description: "Index of output BAM file."} } } @@ -1639,6 +1704,9 @@ task VariantEval { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + table: {description: "Evaluation tables detailing the results of the eval modules which were applied."} } } @@ -1696,5 +1764,9 @@ task VariantFiltration { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed. "} + filteredVcfIndex: {description: "Index of filtered VCF."} } } diff --git a/gffcompare.wdl b/gffcompare.wdl index 8bd53091..f8f0ade0 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -142,6 +142,15 @@ task GffCompare { namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + annotated: {description: ""} + loci: {description: ""} + stats: {description: ""} + tracking: {description: ""} + allFiles: {description: ""} + redundant: {description: ""} + missedIntrons: {description: ""} } meta { From 47aab9c501eb8c3a80250c4d993d0b2e5614ae16 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 11:33:45 +0100 Subject: [PATCH 384/902] Upload some more tasks. --- gffcompare.wdl | 12 ++++++------ gffread.wdl | 6 ++++++ gridss.wdl | 6 ++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index f8f0ade0..221c4907 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -144,13 +144,13 @@ task GffCompare { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - annotated: {description: ""} + annotated: {description: "Annotated GTF file."} loci: {description: ""} - stats: {description: ""} - tracking: {description: ""} - allFiles: {description: ""} - redundant: {description: ""} - missedIntrons: {description: ""} + stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."} + tracking: {description: "File matching transcripts up between samples."} + allFiles: {description: "A collection of all outputs files."} + redundant: {description: "File containing "duplicate"/"redundant" transcripts."} + missedIntrons: {description: "File denoting missed introns."} } meta { diff --git a/gffread.wdl b/gffread.wdl index 343011e9..66230989 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -80,5 +80,11 @@ task GffRead { filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + exonsFasta: {description: "Fasta file containing exons."} + CDSFasta: {description: "Fasta file containing CDS's."} + proteinFasta: {description: "Fasta file containing proteins."} + filteredGff: {description: "Filtered GFF file."} } } diff --git a/gridss.wdl b/gridss.wdl index 9499be5e..65579fd9 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -81,5 +81,11 @@ task GRIDSS { jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling.",category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + vcf: {description: ""} + vcfIndex: {description: ""} + assembly: {description: ""} + assemblyIndex: {description: ""} } } From 68d98441faddc47b8060d42864e11df7907bc0e6 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 14:28:07 +0100 Subject: [PATCH 385/902] Update more tasks. --- LICENSE | 6 ++---- README.md | 16 ++++++---------- TO-DO.md | 9 +++++++++ gffcompare.wdl | 2 +- gridss.wdl | 8 ++++---- hisat2.wdl | 4 ++++ htseq.wdl | 3 +++ manta.wdl | 7 +++++++ requirements-test.txt | 11 ++++++++++- 9 files changed, 46 insertions(+), 20 deletions(-) diff --git a/LICENSE b/LICENSE index 37eeade5..b1f2b679 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,3 @@ -MIT License - Copyright (c) 2017 Leiden University Medical Center Permission is hereby granted, free of charge, to any person obtaining a copy @@ -9,8 +7,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, diff --git a/README.md b/README.md index 246e3814..4bc58367 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,21 @@ # Tasks - This repository contains the WDL task definitions used in the various [Biowdl](https://github.com/biowdl) workflows and pipelines. - ## Documentation - -Documentation for this workflow can be found +Documentation for this repository can be found [here](https://biowdl.github.io/tasks/). ## About -These tasks are part of [Biowdl](https://github.com/biowdl) -developed by [the SASC team](http://sasc.lumc.nl/). +These workflows are part of [Biowdl](https://github.com/biowdl) +developed by the SASC team at [Leiden University Medical Center](https://www.lumc.nl/). ## Contact -

-For any question related to these tasks, please use the +For any question related to Expression-Quantification, please use the github issue tracker -or contact - the SASC team directly at: +or contact the SASC team directly at: + sasc@lumc.nl.

diff --git a/TO-DO.md b/TO-DO.md index 8c1723b6..69f359bd 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,4 +1,12 @@ #TO DO +This file describes WDL files and tasks within those files which need +more specific attention than just adding outputs to the parameter_meta. + +Some tasks have not been updated to match the new SLURM requirements and are +missing a parameter_meta section. + +Some tasks are importing other WDL files. + ## Out of date with new cluster & parameter_meta: * bamstats.wdl: `Generate` * biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, @@ -9,6 +17,7 @@ `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` * fastqsplitter.wdl: `Fastqsplitter` * flash.wdl: `Flash` +* macs2.wdl: `PeakCalling` ## Imports other tasks: * bamstats.wdl diff --git a/gffcompare.wdl b/gffcompare.wdl index 221c4907..4b0d6d22 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -149,7 +149,7 @@ task GffCompare { stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."} tracking: {description: "File matching transcripts up between samples."} allFiles: {description: "A collection of all outputs files."} - redundant: {description: "File containing "duplicate"/"redundant" transcripts."} + redundant: {description: "File containing duplicate/redundant transcripts."} missedIntrons: {description: "File denoting missed introns."} } diff --git a/gridss.wdl b/gridss.wdl index 65579fd9..5f48afac 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -83,9 +83,9 @@ task GRIDSS { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - vcf: {description: ""} - vcfIndex: {description: ""} - assembly: {description: ""} - assemblyIndex: {description: ""} + vcf: {description: "VCF file including variant allele fractions."} + vcfIndex: {description: "Index of output VCF."} + assembly: {description: "The GRIDSS assembly BAM."} + assemblyIndex: {description: "Index of output BAM file."} } } diff --git a/hisat2.wdl b/hisat2.wdl index b52bf70f..a2c0777c 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -106,5 +106,9 @@ task Hisat2 { memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bamFile: {description: "Output BAM file."} + summaryFile: {description: "Alignment summary file."} } } diff --git a/htseq.wdl b/htseq.wdl index cf527535..dfa3fcf2 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -78,5 +78,8 @@ task HTSeqCount { memory: {description: "The amount of memory the job requires in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + counts: {description: "Count table based on input BAM file."} } } diff --git a/manta.wdl b/manta.wdl index a7b7cf38..4b7ea264 100644 --- a/manta.wdl +++ b/manta.wdl @@ -79,6 +79,10 @@ task Germline { memoryGb: {description: "The memory required to run the manta", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mantaVCF: {description: ""} + mantaVCFindex: {description: ""} } } @@ -155,5 +159,8 @@ task Somatic { memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + } } diff --git a/requirements-test.txt b/requirements-test.txt index f074413b..0b01d193 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,2 +1,11 @@ +# These are the programs used for testing these biowdl tasks. +# These requirements can be installed with conda with the bioconda channel +# activated. +# For more information on how to set up conda with bioconda channel see: +# http://bioconda.github.io/#install-conda +# This file can be installed with "conda install --file requirements-test.txt". + cromwell -miniwdl \ No newline at end of file +womtool +miniwdl +wdl-aid From 20ee22a6b9b2063cd900426b54549ba98d9f60d3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 14:32:17 +0100 Subject: [PATCH 386/902] Update README. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4bc58367..9d682de7 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,13 @@ Documentation for this repository can be found [here](https://biowdl.github.io/tasks/). ## About -These workflows are part of [Biowdl](https://github.com/biowdl) +These tasks are part of [Biowdl](https://github.com/biowdl) developed by the SASC team at [Leiden University Medical Center](https://www.lumc.nl/). ## Contact

-For any question related to Expression-Quantification, please use the +For any question related to these tasks, please use the github issue tracker or contact the SASC team directly at: From c9e62bf9a8b562ddee8ecd2ae12aa8784ed1d4ce Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 17:32:07 +0100 Subject: [PATCH 387/902] Update tasks. --- manta.wdl | 13 ++++++++++--- multiqc.wdl | 4 ++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/manta.wdl b/manta.wdl index 4b7ea264..1c949af2 100644 --- a/manta.wdl +++ b/manta.wdl @@ -81,8 +81,8 @@ task Germline { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - mantaVCF: {description: ""} - mantaVCFindex: {description: ""} + mantaVCF: {description: "SVs and indels scored and genotyped under a diploid model for the set of samples in a joint diploid sample analysis or for the normal sample in a tumor/normal subtraction analysis."} + mantaVCFindex: {description: "Index of output mantaVCF."} } } @@ -161,6 +161,13 @@ task Somatic { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - + candidateSmallIndelsVcf: {description: "Subset of the candidateSV.vcf.gz file containing only simple insertion and deletion variants less than the minimum scored variant size."} + candidateSmallIndelsVcfIndex: {description: "Index of output VCF file candidateSmallIndelsVcf."} + candidateSVVcf: {description: "Unscored SV and indel candidates."} + candidatSVVcfIndex: {description: "Index of output VCF file candidateSVVcf."} + tumorSVVcf: {description: "Subset of the candidateSV.vcf.gz file after removing redundant candidates and small indels less than the minimum scored variant size."} + tumorSVVcfIndex: {description: "Index of output VCF file tumorSVVcf."} + diploidSV: {description: "SVs and indels scored and genotyped under a diploid model for the set of samples in a joint diploid sample analysis or for the normal sample in a tumor/normal subtraction analysis."} + diploidSVindex: {description: "Index of output VCF file diploidSV."} } } diff --git a/multiqc.wdl b/multiqc.wdl index 647394e9..1d248dd6 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -177,5 +177,9 @@ task MultiQC { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + multiqcReport: {description: ""} + multiqcDataDirZip: {description: ""} } } From 100d8add0f092f9396be00cce03491cf0fffa654 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 10 Nov 2020 09:52:53 +0100 Subject: [PATCH 388/902] Add optional gvcf index input --- deepvariant.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 8b08e111..6a2b70b3 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -28,11 +28,11 @@ task RunDeepVariant { File inputBamIndex String modelType String outputVcf - String? postprocessVariantsExtraArgs File? customizedModel Int? numShards String? outputGVcf + String? outputGVcfIndex File? regions String? sampleName Boolean? VCFStatsReport = true @@ -44,6 +44,7 @@ task RunDeepVariant { command { set -e + /opt/deepvariant/bin/run_deepvariant \ --ref ~{referenceFasta} \ --reads ~{inputBam} \ @@ -66,10 +67,10 @@ task RunDeepVariant { output { File outputVCF = outputVcf - File outputVCFIndex = outputVCF + ".tbi" + File outputVCFIndex = outputVcf + ".tbi" Array[File] outputVCFStatsReport = glob("*.visual_report.html") File? outputGVCF = outputGVcf - File? outputGVCFIndex = outputGVcf + ".tbi" + File? outputGVCFIndex = outputGVcfIndex } parameter_meta { @@ -84,6 +85,7 @@ task RunDeepVariant { customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} + outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to outputGVcf+.tbi.", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} From 4836726ee8677c83d7cc1a1175be85435ab695bc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 10 Nov 2020 09:53:28 +0100 Subject: [PATCH 389/902] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bff5f3c..48d98036 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ DeepVariant: Add an optional input for the gvcf index. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. From aae72ec8459f36a3beb813c824553b56b1a820f5 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 13:09:12 +0100 Subject: [PATCH 390/902] Add more updates. --- TO-DO.md | 2 ++ deepvariant.wdl | 3 +-- multiqc.wdl | 4 ++-- pacbio.wdl | 8 +++++++- pbbam.wdl | 4 ++++ picard.wdl | 9 +++++++++ 6 files changed, 25 insertions(+), 5 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 69f359bd..7368005a 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -18,6 +18,8 @@ Some tasks are importing other WDL files. * fastqsplitter.wdl: `Fastqsplitter` * flash.wdl: `Flash` * macs2.wdl: `PeakCalling` +* ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` +* ## Imports other tasks: * bamstats.wdl diff --git a/deepvariant.wdl b/deepvariant.wdl index 618200aa..28aee813 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -44,7 +44,6 @@ task RunDeepVariant { command { set -e - /opt/deepvariant/bin/run_deepvariant \ --ref ~{referenceFasta} \ --reads ~{inputBam} \ @@ -85,7 +84,7 @@ task RunDeepVariant { customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} - outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to outputGVcf+.tbi.", category: "common"} + outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to `outputGVcf + '.tbi.'`", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} diff --git a/multiqc.wdl b/multiqc.wdl index 1d248dd6..405c0a0b 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -179,7 +179,7 @@ task MultiQC { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - multiqcReport: {description: ""} - multiqcDataDirZip: {description: ""} + multiqcReport: {description: "Results from bioinformatics analyses across many samples in a single report."} + multiqcDataDirZip: {description: "The parsed data directory compressed with zip."} } } diff --git a/pacbio.wdl b/pacbio.wdl index 01f6d4fd..df0343d9 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -43,7 +43,7 @@ task mergePacBio { } output { - File MergedReport = mergedReport + File outputMergedReport = mergedReport } parameter_meta { @@ -52,6 +52,9 @@ task mergePacBio { mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputMergedReport: {description: "The PacBio reports merged into one."} } } @@ -85,5 +88,8 @@ task ccsChunks { chunkCount: {description: "The number of chunks to create.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + chunks: {description: "The chunks created based on `chunkCount`."} } } diff --git a/pbbam.wdl b/pbbam.wdl index d271a11a..d893e64d 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -66,5 +66,9 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indexedBam: {description: "The original input BAM file."} + index: {description: "The index of the input BAM file."} } } diff --git a/picard.wdl b/picard.wdl index f1876f7b..d288f0e5 100644 --- a/picard.wdl +++ b/picard.wdl @@ -61,6 +61,9 @@ task BedToIntervalList { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + intervalList: {description: "Picard Interval List from a BED file."} } } @@ -125,6 +128,9 @@ task CollectHsMetrics { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + HsMetrics: {description: "Hybrid-selection (HS) metrics for the input BAM file."} } } @@ -240,6 +246,9 @@ task CollectMultipleMetrics { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + alignmentSummary: {description: ""} } } From f111c363b74ec64ee7ba06db7a7ad2b3f3ada05a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 17:00:31 +0100 Subject: [PATCH 391/902] Update style. --- CHANGELOG.md | 13 +++++-------- README.md | 16 ++++++++-------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 836af4ac..0b668ab1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,6 @@ Changelog ========== -For any question related to these tasks, please use the -github issue tracker +For any question related to Tasks, please use the +github issue tracker or contact the SASC team directly at: - + sasc@lumc.nl.

From b633bd790ee89de61a1673092b9d98cb4006d91e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 17:06:03 +0100 Subject: [PATCH 392/902] Update more tasks. --- picard.wdl | 31 +++++++++++++++++++++++++++++++ rtg.wdl | 6 ++++++ 2 files changed, 37 insertions(+) diff --git a/picard.wdl b/picard.wdl index d288f0e5..fd278958 100644 --- a/picard.wdl +++ b/picard.wdl @@ -249,6 +249,24 @@ task CollectMultipleMetrics { # outputs alignmentSummary: {description: ""} + baitBiasDetail: {description: ""} + baitBiasSummary: {description: ""} + baseDistributionByCycle: {description: ""} + baseDistributionByCyclePdf: {description: ""} + errorSummary: {description: ""} + gcBiasDetail: {description: ""} + gcBiasPdf: {description: ""} + gcBiasSummary: {description: ""} + insertSizeHistogramPdf: {description: ""} + insertSize: {description: ""} + preAdapterDetail: {description: ""} + preAdapterSummary: {description: ""} + qualityByCycle: {description: ""} + qualityByCyclePdf: {description: ""} + qualityDistribution: {description: ""} + qualityDistributionPdf: {description: ""} + qualityYield: {description: ""} + allStats: {description: ""} } } @@ -301,6 +319,10 @@ task CollectRnaSeqMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + metrics: {description: ""} + chart: {description: ""} } } @@ -361,6 +383,11 @@ task CollectTargetedPcrMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + perTargetCoverage: {description: ""} + perBaseCoverage: {description: ""} + metrics: {description: ""} } } @@ -410,6 +437,10 @@ task CollectVariantCallingMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + details: {description: ""} + summary: {description: ""} } } diff --git a/rtg.wdl b/rtg.wdl index bfd32957..a6f8da52 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -59,6 +59,9 @@ task Format { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + sdf: {description: ""} } } @@ -161,5 +164,8 @@ task VcfEval { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + } } From 8c42b1e1de607623fa00863472c9570158e6e495 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 17:49:13 +0100 Subject: [PATCH 393/902] Update more tasks. --- TO-DO.md | 2 +- rtg.wdl | 18 +++++++++++++++--- smoove.wdl | 3 +++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 7368005a..e9824dfb 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -19,7 +19,7 @@ Some tasks are importing other WDL files. * flash.wdl: `Flash` * macs2.wdl: `PeakCalling` * ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` -* +* seqtk.wdl: `Sample` ## Imports other tasks: * bamstats.wdl diff --git a/rtg.wdl b/rtg.wdl index a6f8da52..0e86ce3f 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -52,7 +52,7 @@ task Format { parameter_meta { # inputs - inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"} + inputFiles: {description: "Input sequence files. May be specified 1 or more times.", category: "required"} format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", category: "advanced"} outputPath: {description: "Where the output should be placed.", category: "advanced"} rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"} @@ -61,7 +61,7 @@ task Format { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - sdf: {description: ""} + sdf: {description: "RTGSequence Data File (SDF) format version of the input file(s)."} } } @@ -166,6 +166,18 @@ task VcfEval { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - + falseNegativesVcf: {description: "Variants from thebaselineVCF which were not correctly called."} + falseNegativesVcfIndex: {description: "Index of the output VCF file `falseNegativesVcf`."} + falsePositivesVcf: {description: "Variants from thecallsVCF which do not agree with baseline variants."} + falsePositivesVcfIndex: {description: "Index of the output VCF file `falsePositivesVcf`."} + summary: {description: "Summary statistic file."} + truePositivesBaselineVcf: {description: "Variants from thebaselineVCF which agree with variants in thecalls VCF."} + truePositivesBaselineVcfIndex: {description: "Index of the output VCF file `truePositivesBaselineVcf`."} + truePositivesVcf: {description: "Variants from thecallsVCF which agree with variants in the baseline VCF."} + truePositivesVcfIndex: {description: "Index of the output VCF file `truePositivesVcf`."} + nonSnpRoc: {description: "ROC data derived from those variants which were not represented asSNPs."} + phasing: {description: "Phasing file."} + weightedRoc: {description: "ROC data derived from all analyzed call variants, regardless of their representation."} + allStats: {description: "All output files combined in a array."} } } diff --git a/smoove.wdl b/smoove.wdl index 244c2cac..cafc6b08 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -65,5 +65,8 @@ task Call { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + smooveVcf: {description: "Calls and genotyping of structural variants in VCF file." } } From 3eeef3f777e4863f9da50e51f9ed0699578d28c2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 10:01:21 +0100 Subject: [PATCH 394/902] Update 2 new tasks. --- samtools.wdl | 35 +++++++++++++++++++++++++++++++++++ somaticseq.wdl | 6 ++++++ 2 files changed, 41 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index e274cf58..fcd996c7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -56,6 +56,10 @@ task BgzipAndIndex { type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + compressed: {description: ""} + index: {description: ""} } } @@ -161,6 +165,11 @@ task Fastq { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + read1: {description: ""} + read2: {description: ""} + read0: {description: ""} } } @@ -203,6 +212,10 @@ task FilterShortReadsBam { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + filteredBam: {description: ""} + filteredBamIndex: {description: ""} } } @@ -239,6 +252,9 @@ task Flagstat { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + flagstat: {description: ""} } } @@ -288,6 +304,10 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indexedBam: {description: ""} + index: {description: ""} } } @@ -321,6 +341,9 @@ task Markdup { outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} } } @@ -370,6 +393,10 @@ task Merge { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} } } @@ -474,6 +501,10 @@ task Tabix { type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indexedFile: {description: ""} + index: {description: ""} } } @@ -540,5 +571,9 @@ task View { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} } } diff --git a/somaticseq.wdl b/somaticseq.wdl index 1c73fc58..0cd944c6 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -123,6 +123,12 @@ task ParallelPaired { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indels: {description: ""} + snvs: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} } } From e2461ff107d8d070c063ea47a782929e95bebb80 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 10:27:49 +0100 Subject: [PATCH 395/902] Fix travis error. --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index cafc6b08..82079b2f 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -67,6 +67,6 @@ task Call { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls and genotyping of structural variants in VCF file." + smooveVcf: {description: "Calls and genotyping of structural variants in VCF file."} } } From 7dff854e906a14db3f69647b5f35596a9687d703 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 13:45:43 +0100 Subject: [PATCH 396/902] Update even more outputs. --- TO-DO.md | 11 ++++++----- somaticseq.wdl | 25 +++++++++++++++++++++++++ star.wdl | 22 ++++++++++++++++++++++ strelka.wdl | 10 ++++++++++ stringtie.wdl | 7 +++++++ survivor.wdl | 3 +++ umi-tools.wdl | 11 +++++++++++ vardict.wdl | 3 +++ vt.wdl | 3 +++ 9 files changed, 90 insertions(+), 5 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index e9824dfb..b54d995c 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -20,6 +20,9 @@ Some tasks are importing other WDL files. * macs2.wdl: `PeakCalling` * ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` * seqtk.wdl: `Sample` +* spades.wdl: `Spades` +* unicycler.wdl: `Unicycler` + ## Imports other tasks: * bamstats.wdl @@ -27,9 +30,7 @@ Some tasks are importing other WDL files. * sampleconfig.wdl * seqstat.wdl * clever.wdl +* strelka.wdl -## Requires parameter_meta: -* - -## Duplicate tasks: -* +## Requires input from others: +* somaticseq.wdl diff --git a/somaticseq.wdl b/somaticseq.wdl index 0cd944c6..07103ef9 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -234,6 +234,14 @@ task ParallelPairedTrain { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + consensusIndels: {description: ""} + consensusSNV: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} + ensembleIndelsClassifier: {description: ""} + ensembleSNVClassifier: {description: ""} } } @@ -317,6 +325,12 @@ task ParallelSingle { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indels: {description: ""} + snvs: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} } } @@ -399,6 +413,14 @@ task ParallelSingleTrain { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + consensusIndels: {description: ""} + consensusSNV: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} + ensembleIndelsClassifier: {description: ""} + ensembleSNVClassifier: {description: ""} } } @@ -435,5 +457,8 @@ task ModifyStrelka { outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: ""} } } diff --git a/star.wdl b/star.wdl index 68193fcd..aa1fd608 100644 --- a/star.wdl +++ b/star.wdl @@ -86,6 +86,24 @@ task GenomeGenerate { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + chrLength: {description: "Text chromosome lengths file."} + chrNameLength: {description: "Text chromosome name lengths file."} + chrName: {description: "Text chromosome names file."} + chrStart: {description: "Chromosome start sites file."} + genome: {description: "Binary genome sequence file."} + genomeParameters: {description: "Genome parameters file."} + sa: {description: "Suffix arrays file."} + saIndex: {description: "Index file of suffix arrays."} + exonGeTrInfo: {description: "Exon, gene and transcript information file."} + exonInfo: {description: "Exon information file."} + geneInfo: {description: "Gene information file."} + sjdbInfo: {description: "Splice junctions coordinates file."} + sjdbListFromGtfOut: {description: "Splice junctions from input GTF file."} + sjdbListOut: {description: "Splice junction list file."} + transcriptInfo: {description: "Transcripts information file."} + starIndex: {description: "A collection of all STAR index files."} } } @@ -181,6 +199,10 @@ task Star { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bamFile: {description: "Alignment file."} + logFinalOut: {description: "Log information file."} } } diff --git a/strelka.wdl b/strelka.wdl index f4b9888b..be08e386 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -83,6 +83,10 @@ task Germline { memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + variants: {description: "Output VCF file."} + variantsIndex: {description: "Index of output VCF file."} } } @@ -158,6 +162,12 @@ task Somatic { memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indelsVcf: {description: "VCF containing INDELS."} + indelsIndex: {description: "Index of output `indelsVcf`."} + variants: {description: "VCF containing variants."} + variantsIndex: {description: "Index of output `variants`."} } meta { diff --git a/stringtie.wdl b/stringtie.wdl index fff4140c..05df05c6 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -78,6 +78,10 @@ task Stringtie { memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + assembledTranscripts: {description: "GTF file containing the assembled transcripts."} + geneAbundance: {description: "Gene abundances in tab-delimited format."} } } @@ -141,5 +145,8 @@ task Merge { memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedGtfFile: {description: "A merged GTF file from a set of GTF files."} } } diff --git a/survivor.wdl b/survivor.wdl index c7b31058..8b0360d8 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -74,5 +74,8 @@ task Merge { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedVcf: {description: "All the vcf files specified in fileList merged."} } } diff --git a/umi-tools.wdl b/umi-tools.wdl index 7b0a3991..6524d656 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -66,6 +66,10 @@ task Extract { threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + extractedRead1: {description: "First read with UMI extracted to read name."} + extractedRead2: {description: "Second read with UMI extracted to read name."} } } @@ -124,5 +128,12 @@ task Dedup { memory: {description: "The amount of memory required for the task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + deduppedBam: {description: "Deduplicated BAM file."} + deduppedBamIndex: {description: "Index of the deduplicated BAM file."} + editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."} + umiStats: {description: "UMI-level summary statistics."} + positionStats: {description: "The counts for unique combinations of UMI and position."} } } diff --git a/vardict.wdl b/vardict.wdl index fc37c9ef..1c20e51c 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -118,5 +118,8 @@ task VarDict { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + vcfFile: {description: "Output VCF file."} } } diff --git a/vt.wdl b/vt.wdl index 95585ff2..94414050 100644 --- a/vt.wdl +++ b/vt.wdl @@ -64,5 +64,8 @@ task Normalize { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Output VCF file."} } } From 7d76ed6c3e0bfa5ab679deb54ef24da0955d1ed0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 14:45:04 +0100 Subject: [PATCH 397/902] Update TO-DO. --- TO-DO.md | 2 +- whatshap.wdl | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/TO-DO.md b/TO-DO.md index b54d995c..9a7db355 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -22,7 +22,7 @@ Some tasks are importing other WDL files. * seqtk.wdl: `Sample` * spades.wdl: `Spades` * unicycler.wdl: `Unicycler` - +* wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` ## Imports other tasks: * bamstats.wdl diff --git a/whatshap.wdl b/whatshap.wdl index 5c69400a..7307ce7c 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -92,6 +92,10 @@ task Phase { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + phasedVCF: {description: "VCF file containing phased variants."} + phasedVCFIndex: {description: "Index of phased VCF file."} } } @@ -144,6 +148,11 @@ task Stats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + phasedGTF: {description: "Phasing statistics for a single VCF file."} + phasedTSV: {description: "Statistics in a tab-separated value format."} + phasedBlockList: {description: "List of the total number of phase sets/blocks."} } } @@ -204,5 +213,9 @@ task Haplotag { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bam: {description: "BAM file containing tagged reads for haplotype."} + bamIndex: {description: "Index of the tagged BAM file."} } } From 7dab07f86c611fdc26fe5863ae6eb5d155be430f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 16:46:21 +0100 Subject: [PATCH 398/902] Update picard. --- TO-DO.md | 1 + picard.wdl | 54 +++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 9a7db355..9216bc0c 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -23,6 +23,7 @@ Some tasks are importing other WDL files. * spades.wdl: `Spades` * unicycler.wdl: `Unicycler` * wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` +* picard.wdl: `ScatterIntervalList` ## Imports other tasks: * bamstats.wdl diff --git a/picard.wdl b/picard.wdl index fd278958..2005fe28 100644 --- a/picard.wdl +++ b/picard.wdl @@ -321,8 +321,8 @@ task CollectRnaSeqMetrics { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - metrics: {description: ""} - chart: {description: ""} + metrics: {description: "Metrics describing the distribution of bases within the transcripts."} + chart: {description: "Plot of normalized position vs. coverage."} } } @@ -385,9 +385,9 @@ task CollectTargetedPcrMetrics { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - perTargetCoverage: {description: ""} - perBaseCoverage: {description: ""} - metrics: {description: ""} + perTargetCoverage: {description: "Per target coverage information."} + perBaseCoverage: {description: "Per base coverage information to."} + metrics: {description: "File containing metrics."} } } @@ -539,6 +539,11 @@ task GatherBamFiles { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} + outputBamMd5: {description: ""} } } @@ -582,6 +587,9 @@ task GatherVcfs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: ""} } } @@ -666,6 +674,12 @@ task MarkDuplicates { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} + outputBamMd5: {description: ""} + metricsFile: {description: ""} } } @@ -725,6 +739,10 @@ task MergeVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Multiple variant files combined into a single variant file."} + outputVcfIndex: {description: "Index of `outputVcf`."} } } @@ -768,6 +786,21 @@ task SamToFastq { docker: dockerImage } + parameter_meta { + # inputs + inputBam: {description: "Input BAM file to extract reads from.", category: "required"} + inputBamIndex: {description: "Input BAM index file.", category: "required"} + paired: {description: "Set to false when input data is single-end.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + read1: {description: "Fastq file containing reads from the first pair."} + read2: {description: "Fastq file containing reads from the second pair."} + unpairedRead: {description: "Fastq file containing unpaired reads."} + meta { WDL_AID: { exclude: ["noneFile"] @@ -864,6 +897,10 @@ task SortSam { XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Index of sorted BAM file."} } } @@ -911,6 +948,10 @@ task SortVcf { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file(s)."} + outputVcfIndex: {description: "Index(es) of sort(ed) VCF file(s)."} } } @@ -955,5 +996,8 @@ task RenameSample { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + renamedVcf: {description: "New VCF with renamed sample."} } } From b22cc9f9ba8e20e4685005bede66fe0dc129ccd8 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 17:02:49 +0100 Subject: [PATCH 399/902] Fix travis error. --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 2005fe28..cc2634f0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -800,6 +800,7 @@ task SamToFastq { read1: {description: "Fastq file containing reads from the first pair."} read2: {description: "Fastq file containing reads from the second pair."} unpairedRead: {description: "Fastq file containing unpaired reads."} + } meta { WDL_AID: { From ccc38727ddf49a3cebb566fadf7145675b0eafa2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 17:19:50 +0100 Subject: [PATCH 400/902] Update samtools. --- samtools.wdl | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index fcd996c7..0aecf4ee 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -58,8 +58,8 @@ task BgzipAndIndex { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - compressed: {description: ""} - index: {description: ""} + compressed: {description: "Compressed input file."} + index: {description: "Index of the compressed input file."} } } @@ -167,9 +167,9 @@ task Fastq { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - read1: {description: ""} - read2: {description: ""} - read0: {description: ""} + read1: {description: "Reads with the READ1 FLAG set."} + read2: {description: "Reads with the READ2 FLAG set."} + read0: {description: "Reads with either READ1 FLAG or READ2 flag set."} } } @@ -214,8 +214,8 @@ task FilterShortReadsBam { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - filteredBam: {description: ""} - filteredBamIndex: {description: ""} + filteredBam: {description: "BAM file filtered for short reads."} + filteredBamIndex: {description: "Index of filtered BAM file."} } } @@ -254,7 +254,7 @@ task Flagstat { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - flagstat: {description: ""} + flagstat: {description: "The number of alignments for each FLAG type."} } } @@ -306,8 +306,8 @@ task Index { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - indexedBam: {description: ""} - index: {description: ""} + indexedBam: {description: "BAM file that was indexed."} + index: {description: "Index of the input BAM file."} } } @@ -343,7 +343,7 @@ task Markdup { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} + outputBam: {description: "BAM file with duplicate alignments marked."} } } @@ -395,8 +395,8 @@ task Merge { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} - outputBamIndex: {description: ""} + outputBam: {description: "Multiple BAM files merged into one."} + outputBamIndex: {description: "Index of the merged BAM file."} } } @@ -503,8 +503,8 @@ task Tabix { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - indexedFile: {description: ""} - index: {description: ""} + indexedFile: {description: "Indexed input file."} + index: {description: "Index of the input file."} } } @@ -573,7 +573,7 @@ task View { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} - outputBamIndex: {description: ""} + outputBam: {description: "Processed input file."} + outputBamIndex: {description: "Index of the processed input file."} } } From 4c56f143264390a79319c7c85e3dcca7732fb0f2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 17:24:47 +0100 Subject: [PATCH 401/902] Update picard. --- picard.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/picard.wdl b/picard.wdl index cc2634f0..20fd1f95 100644 --- a/picard.wdl +++ b/picard.wdl @@ -541,9 +541,9 @@ task GatherBamFiles { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} - outputBamIndex: {description: ""} - outputBamMd5: {description: ""} + outputBam: {description: "Concatenated BAM files."} + outputBamIndex: {description: "Index of the output `outputBam`."} + outputBamMd5: {description: "MD5 of the output `outputBam`."} } } @@ -589,7 +589,7 @@ task GatherVcfs { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: ""} + outputVcf: {description: "Multiple VCF files gathered into one file."} } } From 19610fe328fbfee31e922684663d9a190e631194 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Thu, 12 Nov 2020 12:03:18 +0100 Subject: [PATCH 402/902] Update smoove.wdl --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 82079b2f..e5c5348f 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -67,6 +67,6 @@ task Call { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls and genotyping of structural variants in VCF file."} + smooveVcf: {description: "Calls of structural variants in VCF file."} } } From 7aea19d5feeab4aa5ff5a035216157d375dad116 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Thu, 12 Nov 2020 12:11:18 +0100 Subject: [PATCH 403/902] Update vt.wdl --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 94414050..85077dae 100644 --- a/vt.wdl +++ b/vt.wdl @@ -66,6 +66,6 @@ task Normalize { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Output VCF file."} + outputVcf: {description: "Normalized & decomposed VCF file."} } } From c3255755087999b129670fda036bfbe4fe6771d6 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Nov 2020 12:13:56 +0100 Subject: [PATCH 404/902] Update gffcompare. --- biopet/bamstats.wdl | 70 ----- biopet/biopet.wdl | 552 ---------------------------------------- biopet/sampleconfig.wdl | 143 ----------- biopet/seqstat.wdl | 64 ----- gffcompare.wdl | 6 +- 5 files changed, 3 insertions(+), 832 deletions(-) delete mode 100644 biopet/bamstats.wdl delete mode 100644 biopet/biopet.wdl delete mode 100644 biopet/sampleconfig.wdl delete mode 100644 biopet/seqstat.wdl diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl deleted file mode 100644 index d01bc10c..00000000 --- a/biopet/bamstats.wdl +++ /dev/null @@ -1,70 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" as common - -task Generate { - input { - IndexedBamFile bam - Boolean scatterMode = false - Boolean onlyUnmapped = false - Boolean tsvOutputs = false - String outputDir - - String? preCommand - File? toolJar - File? bedFile - Reference? reference - - String javaXmx = "8G" - String memory = "9G" - } - - File referenceFasta = if defined(reference) then select_first([reference]).fasta else "" - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-bamstats -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p ~{outputDir} - ~{toolCommand} Generate \ - --bam ~{bam.file} \ - ~{"--bedFile " + bedFile} \ - ~{true="--reference" false="" defined(reference)} ~{referenceFasta} \ - ~{true="--onlyUnmapped" false="" onlyUnmapped} \ - ~{true="--scatterMode" false="" scatterMode} \ - ~{true="--tsvOutputs" false="" tsvOutputs} \ - --outputDir ~{outputDir} - } - - output { - File json = outputDir + "/bamstats.json" - File summaryJson = outputDir + "/bamstats.summary.json" - } - - runtime { - memory: memory - } -} diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl deleted file mode 100644 index e6619e09..00000000 --- a/biopet/biopet.wdl +++ /dev/null @@ -1,552 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" - -task BaseCounter { - input { - IndexedBamFile bam - File refFlat - String outputDir - String prefix - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-basecounter -Xmx~{javaXmx}" - - command { - set -e -o pipefail - mkdir -p ~{outputDir} - ~{preCommand} - ~{toolCommand} \ - -b ~{bam.file} \ - -r ~{refFlat} \ - -o ~{outputDir} \ - -p ~{prefix} - } - - output { - File exonAntisense = outputDir + "/" + prefix + ".base.exon.antisense.counts" - File exon = outputDir + "/" + prefix + ".base.exon.counts" - File exonMergeAntisense = outputDir + "/" + prefix + ".base.exon.merge.antisense.counts" - File exonMerge = outputDir + "/" + prefix + ".base.exon.merge.counts" - File exonMergeSense = outputDir + "/" + prefix + ".base.exon.merge.sense.counts" - File exonSense = outputDir + "/" + prefix + ".base.exon.sense.counts" - File geneAntisense = outputDir + "/" + prefix + ".base.gene.antisense.counts" - File gene = outputDir + "/" + prefix + ".base.gene.counts" - File geneExonicAntisense = outputDir + "/" + prefix + ".base.gene.exonic.antisense.counts" - File geneExonic = outputDir + "/" + prefix + ".base.gene.exonic.counts" - File geneExonicSense = outputDir + "/" + prefix + ".base.gene.exonic.sense.counts" - File geneIntronicAntisense = outputDir + "/" + prefix + ".base.gene.intronic.antisense.counts" - File geneIntronic = outputDir + "/" + prefix + ".base.gene.intronic.counts" - File geneIntronicSense = outputDir + "/" + prefix + ".base.gene.intronic.sense.counts" - File geneSense = outputDir + "/" + prefix + ".base.gene.sense.counts" - File intronAntisense = outputDir + "/" + prefix + ".base.intron.antisense.counts" - File intron = outputDir + "/" + prefix + ".base.intron.counts" - File intronMergeAntisense = outputDir + "/" + prefix + ".base.intron.merge.antisense.counts" - File intronMerge = outputDir + "/" + prefix + ".base.intron.merge.counts" - File intronMergeSense = outputDir + "/" + prefix + ".base.intron.merge.sense.counts" - File intronSense = outputDir + "/" + prefix + ".base.intron.sense.counts" - File metaExonsNonStranded = outputDir + "/" + prefix + ".base.metaexons.non_stranded.counts" - File metaExonsStrandedAntisense = outputDir + "/" + prefix + ".base.metaexons.stranded.antisense.counts" - File metaExonsStranded = outputDir + "/" + prefix + ".base.metaexons.stranded.counts" - File metaExonsStrandedSense = outputDir + "/" + prefix + ".base.metaexons.stranded.sense.counts" - File transcriptAntisense = outputDir + "/" + prefix + ".base.transcript.antisense.counts" - File transcript = outputDir + "/" + prefix + ".base.transcript.counts" - File transcriptExonicAntisense = outputDir + "/" + prefix + ".base.transcript.exonic.antisense.counts" - File transcriptExonic = outputDir + "/" + prefix + ".base.transcript.exonic.counts" - File transcriptExonicSense = outputDir + "/" + prefix + ".base.transcript.exonic.sense.counts" - File transcriptIntronicAntisense = outputDir + "/" + prefix + ".base.transcript.intronic.antisense.counts" - File transcriptIntronic = outputDir + "/" + prefix + ".base.transcript.intronic.counts" - File transcriptIntronicSense = outputDir + "/" + prefix + ".base.transcript.intronic.sense.counts" - File transcriptSense = outputDir + "/" + prefix + ".base.transcript.sense.counts" - } - - runtime { - memory: memory - } -} - -task ExtractAdaptersFastqc { - input { - File inputFile - String outputDir - String adapterOutputFilePath = outputDir + "/adapter.list" - String contamsOutputFilePath = outputDir + "/contaminations.list" - - Boolean? skipContams - File? knownContamFile - File? knownAdapterFile - Float? adapterCutoff - Boolean? outputAsFasta - - String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" - } - - command { - set -e - mkdir -p ~{outputDir} - biopet-extractadaptersfastqc -Xmx~{javaXmx} \ - --inputFile ~{inputFile} \ - ~{"--adapterOutputFile " + adapterOutputFilePath } \ - ~{"--contamsOutputFile " + contamsOutputFilePath } \ - ~{"--knownContamFile " + knownContamFile} \ - ~{"--knownAdapterFile " + knownAdapterFile} \ - ~{"--adapterCutoff " + adapterCutoff} \ - ~{true="--skipContams" false="" skipContams} \ - ~{true="--outputAsFasta" false="" outputAsFasta} - } - - output { - File adapterOutputFile = adapterOutputFilePath - File contamsOutputFile = contamsOutputFilePath - Array[String] adapterList = read_lines(adapterOutputFile) - Array[String] contamsList = read_lines(contamsOutputFile) - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - # inputs - inputFile: {description: "Input fastq file.", category: "required"} - outputDir: {description: "The path to which the output should be written.", category: "required"} - adapterOutputFilePath: {description: "Output file for adapters, if not supplied output will go to stdout.", category: "common"} - contamsOutputFilePath: {description: "Output file for contaminations, if not supplied output will go to stdout.", category: "common"} - skipContams: {description: "If this is set only the adapters block is used, other wise contaminations is also used.", category: "advanced"} - knownContamFile: {description: "This file should contain the known contaminations from fastqc.", category: ""advanced} - knownAdapterFile: {description: "This file should contain the known adapters from fastqc.", category: "advanced"} - adapterCutoff: {description: "The fraction of the adapters in a read should be above this fraction, default is 0.001.", category: "advanced"} - outputAsFasta: {description: "Output in fasta format, default only sequences.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # outputs - adapterOutputFile: {description: "Output file with adapters."} - contamsOutputFile: {description: "Output file with contaminations."} - adapterList: {description: "List of adapters."} - contamsList: {description: "List of contaminations."} - } -} - -task FastqSplitter { - input { - File inputFastq - Array[String]+ outputPaths - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - String dockerImage = "quay.io/biocontainers/biopet-fastqsplitter:0.1--2" - } - - command { - set -e - mkdir -p $(dirname ~{sep=') $(dirname ' outputPaths}) - biopet-fastqsplitter -Xmx~{javaXmx} \ - -I ~{inputFastq} \ - -o ~{sep=' -o ' outputPaths} - } - - output { - Array[File] chunks = outputPaths - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task FastqSync { - input { - FastqPair refFastq - FastqPair inputFastq - String out1path - String out2path - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-fastqsync -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{out1path}) $(dirname ~{out2path}) - ~{toolCommand} \ - --in1 ~{inputFastq.R1} \ - --in2 ~{inputFastq.R2} \ - --ref1 ~{refFastq.R1} \ - --ref2 ~{refFastq.R2} \ - --out1 ~{out1path} \ - --out2 ~{out2path} - } - - output { - FastqPair out1 = object { - R1: out1path, - R2: out2path - } - } - - runtime { - memory: memory - } -} - -task ScatterRegions { - input { - File referenceFasta - File referenceFastaDict - Int scatterSizeMillions = 1000 - Boolean notSplitContigs = false - - Int? scatterSize - File? regions - File? bamFile - File? bamIndex - - String javaXmx = "500M" - String memory = "1G" - Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" - } - - # OutDirPath must be defined here because the glob process relies on - # linking. This path must be in the containers filesystem, otherwise the - # linking does not work. - String outputDirPath = "scatters" - String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" - - command <<< - set -e -o pipefail - mkdir -p ~{outputDirPath} - biopet-scatterregions -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -R ~{referenceFasta} \ - -o ~{outputDirPath} \ - ~{"-s " + finalSize} \ - ~{"-L " + regions} \ - ~{"--bamFile " + bamFile} \ - ~{true="--notSplitContigs" false="" notSplitContigs} - - # Glob messes with order of scatters (10 comes before 1), which causes - # problems at gatherGvcfs - # Therefore we reorder the scatters with python. - python << CODE - import os - scatters = os.listdir("~{outputDirPath}") - splitext = [ x.split(".") for x in scatters] - splitnum = [x.split("-") + [y] for x,y in splitext] - ordered = sorted(splitnum, key=lambda x: int(x[1])) - merged = ["~{outputDirPath}/{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] - for x in merged: - print(x) - CODE - >>> - - output { - Array[File] scatters = read_lines(stdout()) - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - # inputs - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} - notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", category: "advanced"} - scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} - regions: {description: "The regions to be scattered.", category: "advanced"} - bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", category: "advanced"} - bamIndex: {description: "The index for the bamfile given through bamFile.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # outputs - scatters: {description: "Smaller scatter regions of equal size."} - } -} - -task ValidateAnnotation { - input { - Reference reference - - File? refRefflat - File? gtfFile - - String javaXmx = "3G" - String memory = "4G" - String dockerImage = "quay.io/biocontainers/biopet-validateannotation:0.1--0" - } - - command { - biopet-validateannotation -Xmx~{javaXmx} \ - ~{"-r " + refRefflat} \ - ~{"-g " + gtfFile} \ - -R ~{reference.fasta} - } - - output { - File stderr = stderr() - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task ValidateFastq { - input { - File read1 - File? read2 - - String javaXmx = "3G" - String memory = "4G" - String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--1" - } - - command { - biopet-validatefastq -Xmx~{javaXmx} \ - --fastq1 ~{read1} \ - ~{"--fastq2 " + read2} - } - - output { - File stderr = stderr() - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task ValidateVcf { - input { - IndexedVcfFile vcf - Reference reference - - String javaXmx = "3G" - String memory = "4G" - String dockerImage = "quay.io/biocontainers/biopet-validatevcf:0.1--0" - } - - command { - biopet-validatevcf -Xmx~{javaXmx} \ - -i ~{vcf.file} \ - -R ~{reference.fasta} - } - - output { - File stderr = stderr() - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task VcfStats { - input { - IndexedVcfFile vcf - Reference reference - String outputDir - Boolean writeBinStats = false - Int localThreads = 1 - Boolean notWriteContigStats = false - Boolean skipGeneral = false - Boolean skipGenotype = false - Boolean skipSampleDistributions = false - Boolean skipSampleCompare = false - - File? intervals - Array[String]+? infoTags - Array[String]+? genotypeTags - Int? sampleToSampleMinDepth - Int? binSize - Int? maxContigsInSingleJob - String? sparkMaster - Int? sparkExecutorMemory - Array[String]+? sparkConfigValues - - String javaXmx = "4G" - String memory = "5G" - String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" - } - - command { - set -e - mkdir -p ~{outputDir} - biopet-vcfstats -Xmx~{javaXmx} \ - -I ~{vcf.file} \ - -R ~{reference.fasta} \ - -o ~{outputDir} \ - -t ~{localThreads} \ - ~{"--intervals " + intervals} \ - ~{true="--infoTag" false="" defined(infoTags)} ~{sep=" --infoTag " infoTags} \ - ~{true="--genotypeTag" false="" defined(genotypeTags)} ~{sep=" --genotypeTag " - genotypeTags} \ - ~{"--sampleToSampleMinDepth " + sampleToSampleMinDepth} \ - ~{"--binSize " + binSize} \ - ~{"--maxContigsInSingleJob " + maxContigsInSingleJob} \ - ~{true="--writeBinStats" false="" writeBinStats} \ - ~{true="--notWriteContigStats" false="" notWriteContigStats} \ - ~{true="--skipGeneral" false="" skipGeneral} \ - ~{true="--skipGenotype" false="" skipGenotype} \ - ~{true="--skipSampleDistributions" false="" skipSampleDistributions} \ - ~{true="--skipSampleCompare" false="" skipSampleCompare} \ - ~{"--sparkMaster " + sparkMaster} \ - ~{"--sparkExecutorMemory " + sparkExecutorMemory} \ - ~{true="--sparkConfigValue" false="" defined(sparkConfigValues)} ~{ - sep=" --sparkConfigValue" sparkConfigValues} - } - - output { - File? general = outputDir + "/general.tsv" - File? genotype = outputDir + "/genotype.tsv" - File? sampleDistributionAvailableAggregate = outputDir + - "/sample_distributions/Available.aggregate.tsv" - File? sampleDistributionAvailable = outputDir + "/sample_distributions/Available.tsv" - File? sampleDistributionCalledAggregate = outputDir + - "/sample_distributions/Called.aggregate.tsv" - File? sampleDistributionCalled = outputDir + "/sample_distributions/Called.tsv" - File? sampleDistributionFilteredAggregate = outputDir + - "/sample_distributions/Filtered.aggregate.tsv" - File? sampleDistributionFiltered = outputDir + "/sample_distributions/Filtered.tsv" - File? sampleDistributionHetAggregate = outputDir + "/sample_distributions/Het.aggregate.tsv" - File? sampleDistributionHetNoNRefAggregate = outputDir + - "/sample_distributions/HetNonRef.aggregate.tsv" - File? sampleDistributionHetNonRef = outputDir + "/sample_distributions/HetNonRef.tsv" - File? sampleDistributionHet = outputDir + "/sample_distributions/Het.tsv" - File? sampleDistributionHomAggregate = outputDir + "/sample_distributions/Hom.aggregate.tsv" - File? sampleDistributionHomRefAggregate = outputDir + - "/sample_distributions/HomRef.aggregate.tsv" - File? sampleDistributionHomRef = outputDir + "/sample_distributions/HomRef.tsv" - File? sampleDistributionHom = outputDir + "/sample_distributions/Hom.tsv" - File? sampleDistributionHomVarAggregate = outputDir + - "/sample_distributions/HomVar.aggregate.tsv" - File? sampleDistributionHomVar = outputDir + "/sample_distributions/HomVar.tsv" - File? sampleDistributionMixedAggregate = outputDir + - "/sample_distributions/Mixed.aggregate.tsv" - File? sampleDistributionMixed = outputDir + "/sample_distributions/Mixed.tsv" - File? sampleDistributionNoCallAggregate = outputDir + - "/sample_distributions/NoCall.aggregate.tsv" - File? sampleDistributionNoCall = outputDir + "/sample_distributions/NoCall.tsv" - File? sampleDistributionNonInformativeAggregate = outputDir + - "/sample_distributions/NonInformative.aggregate.tsv" - File? sampleDistributionNonInformative = outputDir + - "/sample_distributions/NonInformative.tsv" - File? sampleDistributionToalAggregate = outputDir + - "/sample_distributions/Total.aggregate.tsv" - File? sampleDistributionTotal = outputDir + "/sample_distributions/Total.tsv" - File? sampleDistributionVariantAggregate = outputDir + - "/sample_distributions/Variant.aggregate.tsv" - File? sampleDistributionVariant = outputDir + "/sample_distributions/Variant.tsv" - File? sampleCompareAlleleAbs = outputDir + "/sample_compare/allele.abs.tsv" - File? sampleCompareAlleleNonRefAbs = outputDir + "/sample_compare/allele.non_ref.abs.tsv" - File? sampleCompareAlleleRefAbs = outputDir + "/sample_compare/allele.ref.abs.tsv" - File? sampleCompareAlleleRel = outputDir + "/sample_compare/allele.rel.tsv" - File? sampleCompareGenotypeAbs = outputDir + "/sample_compare/genotype.abs.tsv" - File? sampleCompareGenotypeNonRefAbs = outputDir + - "/sample_compare/genotype.non_ref.abs.tsv" - File? sampleCompareGenotypeRefAbs = outputDir + "/sample_compare/genotype.ref.abs.tsv" - File? sampleCompareGenotypeRel = outputDir + "/sample_compare/genotype.rel.tsv" - # A glob is easier, but duplicates all the outputs - Array[File] allStats = select_all([ - general, - genotype, - sampleDistributionAvailableAggregate, - sampleDistributionAvailable, - sampleDistributionCalledAggregate, - sampleDistributionCalled, - sampleDistributionFilteredAggregate, - sampleDistributionFiltered, - sampleDistributionHetAggregate, - sampleDistributionHetNoNRefAggregate, - sampleDistributionHetNonRef, - sampleDistributionHet, - sampleDistributionHomAggregate, - sampleDistributionHomRefAggregate, - sampleDistributionHomRef, - sampleDistributionHom, - sampleDistributionHomVarAggregate, - sampleDistributionHomVar, - sampleDistributionMixedAggregate, - sampleDistributionMixed, - sampleDistributionNoCallAggregate, - sampleDistributionNoCall, - sampleDistributionNonInformativeAggregate, - sampleDistributionNonInformative, - sampleDistributionToalAggregate, - sampleDistributionTotal, - sampleDistributionVariantAggregate, - sampleDistributionVariant, - sampleCompareAlleleAbs, - sampleCompareAlleleNonRefAbs, - sampleCompareAlleleRefAbs, - sampleCompareAlleleRel, - sampleCompareGenotypeAbs, - sampleCompareGenotypeNonRefAbs, - sampleCompareGenotypeRefAbs, - sampleCompareGenotypeRel - ]) - } - - runtime { - cpu: localThreads - memory: memory - docker: dockerImage - } -} diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl deleted file mode 100644 index f3955658..00000000 --- a/biopet/sampleconfig.wdl +++ /dev/null @@ -1,143 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" as common - -task SampleConfig { - input { - Array[File]+ inputFiles - String keyFilePath - - File? toolJar - String? preCommand - String? sample - String? library - String? readgroup - String? jsonOutputPath - String? tsvOutputPath - - String javaXmx = "16G" - String memory = "17G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-sampleconfig -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p . ~{"$(dirname " + jsonOutputPath + ")"} ~{"$(dirname " + tsvOutputPath + ")"} - ~{toolCommand} \ - -i ~{sep="-i " inputFiles} \ - ~{"--sample " + sample} \ - ~{"--library " + library} \ - ~{"--readgroup " + readgroup} \ - ~{"--jsonOutput " + jsonOutputPath} \ - ~{"--tsvOutput " + tsvOutputPath} \ - > ~{keyFilePath} - } - - output { - File keysFile = keyFilePath - File? jsonOutput = jsonOutputPath - File? tsvOutput = tsvOutputPath - } - - runtime { - memory: memory - } -} - -task SampleConfigCromwellArrays { - input { - Array[File]+ inputFiles - String outputPath - - File? toolJar - String? preCommand - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-sampleconfig -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputPath}) - ~{toolCommand} CromwellArrays \ - -i ~{sep="-i " inputFiles} \ - ~{"-o " + outputPath} - } - - output { - File outputFile = outputPath - } - - runtime { - memory: memory - } -} - -task CaseControl { - input { - Array[File]+ inputFiles - Array[File]+ inputIndexFiles - Array[File]+ sampleConfigs - String outputPath - String controlTag = "control" - - File? toolJar - String? preCommand - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-sampleconfig -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputPath}) - ~{toolCommand} CaseControl \ - -i ~{sep=" -i " inputFiles} \ - -s ~{sep=" -s " sampleConfigs} \ - ~{"-o " + outputPath} \ - ~{"--controlTag " + controlTag} - } - - output { - File outputFile = outputPath - CaseControls caseControls = read_json(outputFile) - } - - runtime { - memory: memory - } -} diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl deleted file mode 100644 index c2eb5866..00000000 --- a/biopet/seqstat.wdl +++ /dev/null @@ -1,64 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" as common - -task Generate { - input { - FastqPair fastq - String outputFile - String sample - String library - String readgroup - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-seqstat -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputFile}) - ~{toolCommand} Generate \ - --fastqR1 ~{fastq.R1} \ - ~{"--fastqR2 " + fastq.R2} \ - --output ~{outputFile} \ - ~{"--sample " + sample} \ - ~{"--library " + library } \ - ~{"--readgroup " + readgroup } - } - - output { - File json = outputFile - } - - runtime { - memory: memory - } -} diff --git a/gffcompare.wdl b/gffcompare.wdl index 4b0d6d22..50cab8a6 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -145,10 +145,10 @@ task GffCompare { # outputs annotated: {description: "Annotated GTF file."} - loci: {description: ""} + loci: {description: "File describing the processed loci."} stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."} - tracking: {description: "File matching transcripts up between samples."} - allFiles: {description: "A collection of all outputs files."} + tracking: {description: "File matching up transcripts between samples."} + allFiles: {description: "A collection of all output files."} redundant: {description: "File containing duplicate/redundant transcripts."} missedIntrons: {description: "File denoting missed introns."} } From 33166e5795cc175bf8384d8b34ff2bcb4776fbfb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Nov 2020 12:57:36 +0100 Subject: [PATCH 405/902] Update TO-DO.md. --- TO-DO.md | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 9216bc0c..be125abe 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,4 +1,4 @@ -#TO DO +#TO-DO This file describes WDL files and tasks within those files which need more specific attention than just adding outputs to the parameter_meta. @@ -8,11 +8,6 @@ missing a parameter_meta section. Some tasks are importing other WDL files. ## Out of date with new cluster & parameter_meta: -* bamstats.wdl: `Generate` -* biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, - `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats` -* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl` -* seqstat.wdl: `Generate` * common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` * fastqsplitter.wdl: `Fastqsplitter` @@ -25,13 +20,8 @@ Some tasks are importing other WDL files. * wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` * picard.wdl: `ScatterIntervalList` -## Imports other tasks: -* bamstats.wdl -* biopet.wdl -* sampleconfig.wdl -* seqstat.wdl -* clever.wdl -* strelka.wdl - ## Requires input from others: +These tasks below are still missing descriptions `outputs` in +the `parameter_meta`. * somaticseq.wdl +* picard.wdl From 78951778ad81d402d21db421cc6f7284a24c1941 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Nov 2020 14:44:32 +0100 Subject: [PATCH 406/902] Fix syntax. --- gatk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 82244caa..5cf7c673 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1221,10 +1221,10 @@ task ModelSegments { copyRatioCBS: {description: "The posterior medians of the log2 copy ratio."} alleleFractionCBS: {description: "Minor-allele fraction."} unsmoothedModeledSegments: {description: "The initial modeled-segments result before segmentation smoothing."} - unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing"} + unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing."} unsmoothedAlleleFractionParameters: {description: "The initial allele-fraction-model global-parameter result before segmentation smoothing."} modeledSegments: {description: "The final modeled-segments result after segmentation smoothing."} - copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing"} + copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing."} alleleFractionParameters: {description: "The final allele-fraction-model global-parameter result after segmentation smoothing."} normalHetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the matched-normal sample."} } @@ -1766,7 +1766,7 @@ task VariantFiltration { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed. "} + filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed."} filteredVcfIndex: {description: "Index of filtered VCF."} } } From d5863eecf95da8f78d4d06af2bd6b91bc036a4f0 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 17 Nov 2020 14:45:02 +0100 Subject: [PATCH 407/902] Add parameter meta --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index 20fd1f95..029f0899 100644 --- a/picard.wdl +++ b/picard.wdl @@ -122,8 +122,10 @@ task CollectHsMetrics { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} targets: {description: "Picard interval file of the capture targets.", category: "required"} + targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} + baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 8cc4c073e40ac70f3398eda3bd047aa42d801d26 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 17 Nov 2020 14:46:39 +0100 Subject: [PATCH 408/902] Add period to end of sentence --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 029f0899..17930e3c 100644 --- a/picard.wdl +++ b/picard.wdl @@ -125,7 +125,7 @@ task CollectHsMetrics { targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} - baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set", category: "advanced"} + baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 3b0874c0ed573307c2de1926d6df41c808be149b Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 17 Nov 2020 15:59:10 +0100 Subject: [PATCH 409/902] Update picard.wdl parameter meta Co-authored-by: Davy Cats --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 17930e3c..d5601ad0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -122,7 +122,7 @@ task CollectHsMetrics { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} targets: {description: "Picard interval file of the capture targets.", category: "required"} - targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "required"} + targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "advanced"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set.", category: "advanced"} From 67116dfe6c9021a011b97889ee08f99f25d5e7b8 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 17 Nov 2020 16:56:08 +0100 Subject: [PATCH 410/902] Update version of tools. --- CHANGELOG.md | 2 ++ TO-DO.md | 27 --------------------------- cutadapt.wdl | 2 +- stringtie.wdl | 4 ++-- 4 files changed, 5 insertions(+), 30 deletions(-) delete mode 100644 TO-DO.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b668ab1..27d4aa71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update CutAdapt to version 3.0.0. ++ Update StringTie to version 2.1.4. + Complete `parameter_meta` for tasks missing the outputs. + DeepVariant: Add an optional input for the gvcf index. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. diff --git a/TO-DO.md b/TO-DO.md deleted file mode 100644 index be125abe..00000000 --- a/TO-DO.md +++ /dev/null @@ -1,27 +0,0 @@ -#TO-DO -This file describes WDL files and tasks within those files which need -more specific attention than just adding outputs to the parameter_meta. - -Some tasks have not been updated to match the new SLURM requirements and are -missing a parameter_meta section. - -Some tasks are importing other WDL files. - -## Out of date with new cluster & parameter_meta: -* common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, - `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` -* fastqsplitter.wdl: `Fastqsplitter` -* flash.wdl: `Flash` -* macs2.wdl: `PeakCalling` -* ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` -* seqtk.wdl: `Sample` -* spades.wdl: `Spades` -* unicycler.wdl: `Unicycler` -* wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` -* picard.wdl: `ScatterIntervalList` - -## Requires input from others: -These tasks below are still missing descriptions `outputs` in -the `parameter_meta`. -* somaticseq.wdl -* picard.wdl diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..b9f5a649 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" + String dockerImage = "quay.io/biocontainers/cutadapt:3.0--py37hf01694f_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) diff --git a/stringtie.wdl b/stringtie.wdl index 05df05c6..81d96132 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -35,7 +35,7 @@ task Stringtie { Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) - String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" + String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } command { @@ -101,7 +101,7 @@ task Merge { String memory = "10G" Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20) - String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" + String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } command { From 826cbaf4c0b3eae2b5fb3db8439211c1d9f8fdab Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 17 Nov 2020 17:05:22 +0100 Subject: [PATCH 411/902] Update versions. --- CHANGELOG.md | 2 ++ minimap2.wdl | 4 ++-- multiqc.wdl | 2 +- nanopack.wdl | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27d4aa71..3da95305 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update NanoPlot to version 1.32.1. ++ Update MultiQC to version 1.9. + Update CutAdapt to version 3.0.0. + Update StringTie to version 2.1.4. + Complete `parameter_meta` for tasks missing the outputs. diff --git a/minimap2.wdl b/minimap2.wdl index 1b719da6..d2e69905 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -33,7 +33,7 @@ task Indexing { Int cores = 1 String memory = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" + String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" } command { @@ -100,7 +100,7 @@ task Mapping { Int cores = 4 String memory = "30G" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" + String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" } command { diff --git a/multiqc.wdl b/multiqc.wdl index 405c0a0b..2571463a 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 2 + ceil(size(reports, "G") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" + String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } Int memoryGb = 2 + ceil(size(reports, "G")) diff --git a/nanopack.wdl b/nanopack.wdl index f238ce7b..f86641b0 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -42,7 +42,7 @@ task NanoPlot { Int threads = 2 String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/nanoplot:1.32.0--py_0" + String dockerImage = "quay.io/biocontainers/nanoplot:1.32.1--py_0" } Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} From 70b3484461c1b887f558bb2a5a327ce98ac4f388 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 19 Nov 2020 12:53:26 +0100 Subject: [PATCH 412/902] Update versions. --- CHANGELOG.md | 4 ++++ isoseq3.wdl | 2 +- lima.wdl | 2 +- picard.wdl | 32 ++++++++++++++++---------------- samtools.wdl | 18 +++++++++--------- scripts | 2 +- 6 files changed, 32 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3da95305..dae3f185 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update Lima to version 2.0.0. ++ Update IsoSeq3 to version 3.4.0. ++ Update samtools to version 1.11. ++ Update Picard to version 2.23.8. + Update NanoPlot to version 1.32.1. + Update MultiQC to version 1.9. + Update CutAdapt to version 3.0.0. diff --git a/isoseq3.wdl b/isoseq3.wdl index c1c4397c..aacbfc60 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -34,7 +34,7 @@ task Refine { Int threads = 2 String memory = "2G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" + String dockerImage = "quay.io/biocontainers/isoseq3:3.4.0--0" } command { diff --git a/lima.wdl b/lima.wdl index 33b2328b..119db3f4 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,7 +51,7 @@ task Lima { Int threads = 2 String memory = "2G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" + String dockerImage = "quay.io/biocontainers/lima:2.0.0--0" } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} diff --git a/picard.wdl b/picard.wdl index d5601ad0..f75fdc32 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -158,7 +158,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -284,7 +284,7 @@ task CollectRnaSeqMetrics { String memory = "9G" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -342,7 +342,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -404,7 +404,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -453,7 +453,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3G" - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -503,7 +503,7 @@ task GatherBamFiles { Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -558,7 +558,7 @@ task GatherVcfs { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -622,7 +622,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -701,7 +701,7 @@ task MergeVCFs { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -757,7 +757,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" File? noneFile } @@ -818,7 +818,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4G" - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -859,7 +859,7 @@ task SortSam { # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -917,7 +917,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } @@ -967,7 +967,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { diff --git a/samtools.wdl b/samtools.wdl index 0aecf4ee..9042a0df 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -69,7 +69,7 @@ task Faidx { String outputDir String memory = "2G" - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -118,7 +118,7 @@ task Fastq { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -180,7 +180,7 @@ task FilterShortReadsBam { String memory = "1G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -226,7 +226,7 @@ task Flagstat { String memory = "256M" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -266,7 +266,7 @@ task Index { String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } # Select_first is needed, otherwise womtool validate fails. @@ -317,7 +317,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -356,7 +356,7 @@ task Merge { Int threads = 1 String memory = "4G" Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -411,7 +411,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -523,7 +523,7 @@ task View { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } String outputIndexPath = basename(outputFileName) + ".bai" diff --git a/scripts b/scripts index 0cca0f40..85e2ec54 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 0cca0f40a8e9121e8dcc9e76838f85835a0d8e94 +Subproject commit 85e2ec542b65be5f2a25c22db05c28700fbe6db5 From b5558be1a1706b2ad96f947e61db78985c747cd7 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 19 Nov 2020 13:29:00 +0100 Subject: [PATCH 413/902] Revert update CutAdapt. --- .travis.yml | 3 ++- CHANGELOG.md | 1 - VERSION | 2 +- cutadapt.wdl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 396b998f..3cf0681f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,4 +19,5 @@ before_install: install: - conda install --file requirements-test.txt -script: bash scripts/biowdl_lint.sh +script: + - bash scripts/biowdl_lint.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index dae3f185..a6cc9bff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,6 @@ version 5.0.0-dev + Update Picard to version 2.23.8. + Update NanoPlot to version 1.32.1. + Update MultiQC to version 1.9. -+ Update CutAdapt to version 3.0.0. + Update StringTie to version 2.1.4. + Complete `parameter_meta` for tasks missing the outputs. + DeepVariant: Add an optional input for the gvcf index. diff --git a/VERSION b/VERSION index ee74734a..0062ac97 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.1.0 +5.0.0 diff --git a/cutadapt.wdl b/cutadapt.wdl index b9f5a649..b2dbdec0 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:3.0--py37hf01694f_0" + String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) From 9b8d8a9844ea41ad4f1f630ed6b816be5596f8c9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:04:28 +0100 Subject: [PATCH 414/902] add hmftools.wdl --- hmftools.wdl | 433 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 hmftools.wdl diff --git a/hmftools.wdl b/hmftools.wdl new file mode 100644 index 00000000..73c3e318 --- /dev/null +++ b/hmftools.wdl @@ -0,0 +1,433 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Amber { + input { + String normalName + File normalBam + File normalBamIndex + String tumorName + File tumorBam + File tumorBamIndex + String outputDir = "./amber" + File loci + File referenceFasta + File referenceFastaFai + File referenceFastaDict + + Int threads = 2 + String memory = = "33G" + String javaXmx = "32G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" + } + + command { + AMBER -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -reference_bam ~{normalBam} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + -output_dir ~{outputDir} \ + -threads ~{threads} \ + -ref_genome ~{referenceFasta} \ + -loci ~{loci} + } + + output { + File version = "amber.version" + File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" + File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" + File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" + File tumorBafVcfIndex = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz.tbi" + File tumorContaminationVcf = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz" + File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" + File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" + File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" + File normalSnpVcf = "~{outputDir}/~{normalName}.amber.snp.vcf.gz" + File normalSnpVcfIndex = "~{outputDir}/~{normalName}.amber.snp.vcf.gz.tbi" + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, + tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, + normalSnpVcf, normalSnpVcfIndex] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + cpu: threads + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + normalBam: {description: "The normal BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The tumor BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + loci: {description: "A VCF file containing likely heterozygous sites.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Cobalt { + input { + String normalName + File normalBam + File normalBamIndex + String tumorName + File tumorBam + File tumorBamIndex + String outputDir = "./cobalt" + File gcProfile + + Int threads = 1 + String memory = = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" + } + + command { + COBALT -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -reference_bam ~{normalBam} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + -output_dir ~{outputDir}\ + -threads ~{threads} \ + -gc_profile ~{gcProfile} + } + + output { + File version = "cobalt.version" + File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" + File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" + File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" + File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" + File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" + File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" + File tumorChrLen = "~{outputDir}/~{tumorName}.chr.len" + Array[File] outputs = [version, normalGcMedianTsv, normalRationMedianTsv, + normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv, tumorChrLen] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + cpu: threads + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + normalBam: {description: "The normal BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The tumor BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GripssApplicationKt { + input { + File inputVcf + String outputPath = "gripss.vcf.gz" + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File breakpointHotspot + File breakendPon + File breakpointPon + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssApplicationKt \ + -ref_genome ~{referenceFasta} \ + -breakpoint_hotspot ~{breakpointHotspot} \ + -breakend_pon ~{breakendPon} \ + -breakpoint_pon ~{breakpointPon} \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GripssHardFilterApplicationKt { + input { + File inputVcf + String outputPath = "gripss_hard_filter.vcf.gz" + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Purple { + input { + String normalName + String tumorName + String outputDir = "./purple" + Array[File]+ amberOutput + Array[File]+ cobaltOutput + File gcProfile + File somaticVcf + File filteredSvVcf + File fullSvVcf + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File hotspots + + Int threads = 1 + Int time_minutes = 60 + String memory = "13G" + String javaXmx = "12G" + String docker = "quay.io/biocontainers/hmftools-purple:2.51--1" + } + + command { + PURPLE -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -tumor ~{tumorName} \ + -output_dir ~{outputDir} \ + -amber ~{sub(amberOutput, basename(amberOutput[0]), "")} \ + -cobalt ~{sub(cobaltOutput, basename(cobaltOutput[0]), "")} \ + -gc_profile ~{gcProfile} \ + -somatic_vcf ~{somaticVcf} \ + -structural_vcf ~{filteredSvVcf} \ + -sv_recovery_vcf ~{fullSvVcf} \ + -circos /usr/local/bin/circos \ + -ref_genome ~{referenceFasta} \ + -driver_catalog \ + -hotspots ~{hotspots} \ + -threads ~{threads} + + # TODO if shallow also the following: + #-highly_diploid_percentage 0.88 \ + #-somatic_min_total 100 \ + #-somatic_min_purity_spread 0.1 + } + + output { + #TODO + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + amberOutput: {description: "The output files of hmftools amber.", category: "required"} + cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"} + gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + somaticVcf: {description: "The somatic variant calling results.", category: "required"} + filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"} + fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Sage { + input { + String tumorName + File tumorBam + File tumorBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File hotspots + File panelBed + File highConfidenceBed + Boolean hg38 = false + String outputPath = "./sage.vcf.gz" + + String? normalName + File? normalBam + File? normalBamIndex + + Int threads = 2 + String javaXmx = "32G" + String memory = "33G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{true="hg38" false="hg19" hg38} \ + -threads ~{threads} \ + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. + # This seems to be a systemic issue with R generated plots in biocontainers... + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} + highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 90fd344b8f41fb6b1d632a8412ec2b416c5c7715 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:11:19 +0100 Subject: [PATCH 415/902] fix some typos --- hmftools.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 73c3e318..3757cade 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,7 +35,7 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = = "33G" + String memory = "33G" String javaXmx = "32G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" @@ -112,7 +112,7 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = = "9G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" @@ -286,10 +286,10 @@ task Purple { File hotspots Int threads = 1 - Int time_minutes = 60 + Int timeMinutes = 60 String memory = "13G" String javaXmx = "12G" - String docker = "quay.io/biocontainers/hmftools-purple:2.51--1" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.51--1" } command { @@ -297,8 +297,8 @@ task Purple { -reference ~{normalName} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ - -amber ~{sub(amberOutput, basename(amberOutput[0]), "")} \ - -cobalt ~{sub(cobaltOutput, basename(cobaltOutput[0]), "")} \ + -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ + -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ -structural_vcf ~{filteredSvVcf} \ From 764f188c73d8c1b57f0d50b148a30d0e84309c42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:39:52 +0100 Subject: [PATCH 416/902] fix outputs amber/cobalt --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3757cade..09af79c9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -54,7 +54,7 @@ task Amber { } output { - File version = "amber.version" + File version = "~{outputDir}/amber.version" File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" @@ -130,7 +130,7 @@ task Cobalt { } output { - File version = "cobalt.version" + File version = "~{outputDir}/cobalt.version" File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" From 857da21ef4b61276d3beb5ddbe56d0895cd96c32 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:54:06 +0100 Subject: [PATCH 417/902] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 09af79c9..ed2914bf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -124,7 +124,7 @@ task Cobalt { -reference_bam ~{normalBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ - -output_dir ~{outputDir}\ + -output_dir ~{outputDir} \ -threads ~{threads} \ -gc_profile ~{gcProfile} } From 54ac9d0c41f74c578f2418bc76483d1081695369 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 15:18:41 +0100 Subject: [PATCH 418/902] add missed argument in purple --- hmftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index ed2914bf..fc56ecd9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -283,6 +283,7 @@ task Purple { File referenceFasta File referenceFastaFai File referenceFastaDict + File driverGenePanel File hotspots Int threads = 1 @@ -306,6 +307,7 @@ task Purple { -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ -driver_catalog \ + -driver_gene_panel ~{driverGenePanel} \ -hotspots ~{hotspots} \ -threads ~{threads} @@ -340,6 +342,7 @@ task Purple { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + driverGenePanel: {description: "A bed file describing the driver gene panel.", category: "required"} hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} From 243c1dbfc834d2e52876e826bf2f852fe51cb2fb Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 26 Nov 2020 09:05:09 +0100 Subject: [PATCH 419/902] enable genotyping --- smoove.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index e5c5348f..d1011f6c 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -41,11 +41,13 @@ task Call { --outdir ~{outputDir} \ --name ~{sample} \ --fasta ~{referenceFasta} \ + --removepr \ + --genotype \ ~{bamFile} } output { - File smooveVcf = outputDir + "/" + sample + "-smoove.vcf.gz" + File smooveVcf = outputDir + "/" + sample + "-smoove.genotyped.vcf.gz" } runtime { From 86f26caf9fa94c5aa2b2e917bc608e1ef8173966 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 26 Nov 2020 09:31:40 +0100 Subject: [PATCH 420/902] Update PacBio tasks. --- lima.wdl | 7 +++++-- pacbio.wdl | 10 +++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lima.wdl b/lima.wdl index 119db3f4..c06a9a73 100644 --- a/lima.wdl +++ b/lima.wdl @@ -88,9 +88,12 @@ task Lima { ~{barcodeFile} \ ~{outputPrefix + ".bam"} - # copy the files with the default filename to the folder specified in + # Copy the files with the default filename to the folder specified in # outputPrefix. - if [ "~{basename(outputPrefix)}.json" != "~{outputPrefix}.json" ]; then + if [[ -f "~{outputPrefix}.json" ]] + then + echo "Log files already at output location." + else cp "~{basename(outputPrefix)}.json" "~{outputPrefix}.json" cp "~{basename(outputPrefix)}.lima.counts" "~{outputPrefix}.lima.counts" cp "~{basename(outputPrefix)}.lima.report" "~{outputPrefix}.lima.report" diff --git a/pacbio.wdl b/pacbio.wdl index df0343d9..7c0113fd 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -23,7 +23,7 @@ version 1.0 task mergePacBio { input { Array[File]+ reports - String mergedReport + String outputPathMergedReport String memory = "4G" String dockerImage = "lumc/pacbio-merge:0.2" @@ -31,10 +31,10 @@ task mergePacBio { command { set -e - mkdir -p $(dirname ~{mergedReport}) + mkdir -p $(dirname ~{outputPathMergedReport}) pacbio_merge \ --reports ~{sep=" " reports} \ - --json-output ~{mergedReport} + --json-output ~{outputPathMergedReport} } runtime { @@ -43,13 +43,13 @@ task mergePacBio { } output { - File outputMergedReport = mergedReport + File outputMergedReport = outputPathMergedReport } parameter_meta { # inputs reports: {description: "The PacBio report files to merge.", category: "required"} - mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} + outputPathMergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 02f71e1708a92c7128165ab2919b3c9f4fb117dc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 27 Nov 2020 14:44:01 +0100 Subject: [PATCH 421/902] Upload another fix. --- pbbam.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/pbbam.wdl b/pbbam.wdl index d893e64d..ae64b87c 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -36,7 +36,6 @@ task Index { String bamIndexPath = outputPath + ".pbi" command { - bash -c ' set -e # Make sure outputBamPath does not exist. if [ ! -f ~{outputPath} ] @@ -45,7 +44,6 @@ task Index { ln ~{bamFile} ~{outputPath} fi pbindex ~{outputPath} ~{bamIndexPath} - ' } output { From a34711e264482507e73669190b456d4de499f164 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 30 Nov 2020 10:37:23 +0100 Subject: [PATCH 422/902] downgrade stringtie and fix size call in gffread --- CHANGELOG.md | 5 ++++- gffread.wdl | 2 +- stringtie.wdl | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6cc9bff..216fdd67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,13 +10,16 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Fixed the `size` call in the default for gffread's timeMinutes, to retrieve + GBs instead of bytes. ++ Update stringtie to version 1.3.6. + Update Lima to version 2.0.0. + Update IsoSeq3 to version 3.4.0. + Update samtools to version 1.11. + Update Picard to version 2.23.8. + Update NanoPlot to version 1.32.1. + Update MultiQC to version 1.9. -+ Update StringTie to version 2.1.4. ++ ~Update StringTie to version 2.1.4.~ + Complete `parameter_meta` for tasks missing the outputs. + DeepVariant: Add an optional input for the gvcf index. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. diff --git a/gffread.wdl b/gffread.wdl index 66230989..967dd5c9 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,7 +32,7 @@ task GffRead { String? proteinFastaPath String? filteredGffPath - Int timeMinutes = 1 + ceil(size(inputGff) * 10) + Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } diff --git a/stringtie.wdl b/stringtie.wdl index 81d96132..d3a6f73d 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -35,7 +35,7 @@ task Stringtie { Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) - String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" + String dockerImage = "quay.io/biocontainers/stringtie:1.3.6--h92e31bf_0" } command { From ff47f07c0657f717fbf2311b56cdd3ad3b23a7c2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 30 Nov 2020 17:22:38 +0100 Subject: [PATCH 423/902] Update lima. --- CHANGELOG.md | 2 ++ lima.wdl | 39 ++++++++++++++++----------------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6cc9bff..01303723 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. ++ Lima: Fix copy commands. + Update Lima to version 2.0.0. + Update IsoSeq3 to version 3.4.0. + Update samtools to version 1.11. diff --git a/lima.wdl b/lima.wdl index c06a9a73..90cd6986 100644 --- a/lima.wdl +++ b/lima.wdl @@ -56,7 +56,7 @@ task Lima { Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" lima \ @@ -83,33 +83,26 @@ task Lima { ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ --num-threads ~{threads} \ - ~{"--log-file " + outputPrefix + ".stderr.log"} \ + ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{outputPrefix + ".bam"} + ~{outputPrefix + ".fl.bam"} - # Copy the files with the default filename to the folder specified in - # outputPrefix. - if [[ -f "~{outputPrefix}.json" ]] - then - echo "Log files already at output location." - else - cp "~{basename(outputPrefix)}.json" "~{outputPrefix}.json" - cp "~{basename(outputPrefix)}.lima.counts" "~{outputPrefix}.lima.counts" - cp "~{basename(outputPrefix)}.lima.report" "~{outputPrefix}.lima.report" - cp "~{basename(outputPrefix)}.lima.summary" "~{outputPrefix}.lima.summary" - fi - } + dirName="$(dirname ~{outputPrefix})" + find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam" > bamFiles.txt + find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam.pbi" > bamIndexes.txt + find "$(cd ${dirName}; pwd)" -name "*.fl.*.subreadset.xml" > subreadsets.txt + >>> output { - Array[File] limaBam = glob("*.bam") - Array[File] limaBamIndex = glob("*.bam.pbi") - Array[File] limaXml = glob("*.subreadset.xml") - File limaStderr = outputPrefix + ".stderr.log" - File limaJson = outputPrefix + ".json" - File limaCounts = outputPrefix + ".lima.counts" - File limaReport = outputPrefix + ".lima.report" - File limaSummary = outputPrefix + ".lima.summary" + Array[File] limaBam = read_lines("bamFiles.txt") + Array[File] limaBamIndex = read_lines("bamIndexes.txt") + Array[File] limaXml = read_lines("subreadsets.txt") + File limaStderr = outputPrefix + ".fl.stderr.log" + File limaJson = outputPrefix + ".fl.json" + File limaCounts = outputPrefix + ".fl.lima.counts" + File limaReport = outputPrefix + ".fl.lima.report" + File limaSummary = outputPrefix + ".fl.lima.summary" } runtime { From 3de3fcc809734b3a43080a75e9ad683bb0ee055f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 30 Nov 2020 17:24:07 +0100 Subject: [PATCH 424/902] Update CHANGELOG. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01303723..0d6c0bc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. -+ Lima: Fix copy commands. ++ Lima: Fix copy commands & return to `fl` naming. + Update Lima to version 2.0.0. + Update IsoSeq3 to version 3.4.0. + Update samtools to version 1.11. From 0df52e802caa2e7f3793ec37f6378d8929bb6411 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 1 Dec 2020 12:21:37 +0100 Subject: [PATCH 425/902] Remove naming. --- lima.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lima.wdl b/lima.wdl index 90cd6986..2455aaac 100644 --- a/lima.wdl +++ b/lima.wdl @@ -83,26 +83,26 @@ task Lima { ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ --num-threads ~{threads} \ - ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ + ~{"--log-file " + outputPrefix + ".lima.stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{outputPrefix + ".fl.bam"} + ~{outputPrefix + ".bam"} dirName="$(dirname ~{outputPrefix})" - find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam" > bamFiles.txt - find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam.pbi" > bamIndexes.txt - find "$(cd ${dirName}; pwd)" -name "*.fl.*.subreadset.xml" > subreadsets.txt + find "$(cd ${dirName}; pwd)" -name "*.bam" > bamFiles.txt + find "$(cd ${dirName}; pwd)" -name "*.bam.pbi" > bamIndexes.txt + find "$(cd ${dirName}; pwd)" -name "*.subreadset.xml" > subreadsets.txt >>> output { Array[File] limaBam = read_lines("bamFiles.txt") Array[File] limaBamIndex = read_lines("bamIndexes.txt") Array[File] limaXml = read_lines("subreadsets.txt") - File limaStderr = outputPrefix + ".fl.stderr.log" - File limaJson = outputPrefix + ".fl.json" - File limaCounts = outputPrefix + ".fl.lima.counts" - File limaReport = outputPrefix + ".fl.lima.report" - File limaSummary = outputPrefix + ".fl.lima.summary" + File limaStderr = outputPrefix + ".lima.stderr.log" + File limaJson = outputPrefix + ".json" + File limaCounts = outputPrefix + ".lima.counts" + File limaReport = outputPrefix + ".lima.report" + File limaSummary = outputPrefix + ".lima.summary" } runtime { From fec33b447644769d5c1602d7a0fee0c6ee19b3b9 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 1 Dec 2020 12:22:25 +0100 Subject: [PATCH 426/902] Update changelog. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77cf803b..22f41826 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. -+ Lima: Fix copy commands & return to `fl` naming. ++ Lima: Fix copy commands. + Fixed the `size` call in the default for gffread's timeMinutes, to retrieve GBs instead of bytes. + Update stringtie to version 1.3.6. From e87052a739ba2d2ac29cf0dad1cb5ace642f6e8c Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 9 Dec 2020 13:26:24 +0100 Subject: [PATCH 427/902] add duphold paramater in smoove --- smoove.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/smoove.wdl b/smoove.wdl index d1011f6c..7a7e4305 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -43,6 +43,7 @@ task Call { --fasta ~{referenceFasta} \ --removepr \ --genotype \ + --duphold \ ~{bamFile} } From 19b79d9c2617212deb1d2dca1e6ca93c2115d847 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Dec 2020 15:59:19 +0100 Subject: [PATCH 428/902] Use github actions CI --- .github/workflows/ci.yml | 32 ++++++++++++++++++++++++++++++++ .travis.yml | 23 ----------------------- requirements-test.txt | 11 ----------- 3 files changed, 32 insertions(+), 34 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml delete mode 100644 requirements-test.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..97d329ad --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,32 @@ +name: Continuous integration + +on: + pull_request: + paths: + - "**.wdl" # Workflow files and task + - "**.yml" # Ci configuration, tests and docker images + - "!docs/**" + +defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important + shell: bash -l {0} + +jobs: + lint: + runs-on: ubuntu-latest + name: Womtool validate and submodule up to date. + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: install miniconda + uses: conda-incubator/setup-miniconda@v2.0.1 + with: + channels: conda-forge,bioconda,defaults + # Conda-incubator uses 'test' environment by default. + - name: install requirements + run: conda install -n test cromwell miniwdl wdl-aid + - name: run linting + run: bash scripts/biowdl_lint.sh \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 3cf0681f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,23 +0,0 @@ -# We use conda to install cromwell. - -language: python - -python: - - 3.6 - -before_install: - # Install conda - - export MINICONDA=${HOME}/miniconda - - export PATH=${MINICONDA}/bin:${PATH} - - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -f -p ${MINICONDA} - - conda config --set always_yes yes - - conda config --add channels defaults - - conda config --add channels bioconda - - conda config --add channels conda-forge - -install: - - conda install --file requirements-test.txt - -script: - - bash scripts/biowdl_lint.sh diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index 0b01d193..00000000 --- a/requirements-test.txt +++ /dev/null @@ -1,11 +0,0 @@ -# These are the programs used for testing these biowdl tasks. -# These requirements can be installed with conda with the bioconda channel -# activated. -# For more information on how to set up conda with bioconda channel see: -# http://bioconda.github.io/#install-conda -# This file can be installed with "conda install --file requirements-test.txt". - -cromwell -womtool -miniwdl -wdl-aid From 52b7c02f4ed1e7bee376af192747efa75cf55004 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 10:04:43 +0100 Subject: [PATCH 429/902] bcftools: rm memory parameter meta --- bcftools.wdl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28380dea..0cbfdefd 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -43,7 +43,7 @@ task Annotate { File? regionsFile File? renameChrs File? samplesFile - + Int threads = 0 String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) @@ -53,7 +53,7 @@ task Annotate { Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { - set -e + set -e mkdir -p "$(dirname ~{outputPath})" bcftools annotate \ -o ~{outputPath} \ @@ -154,7 +154,7 @@ task Sort { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" } - + runtime { memory: memory time_minutes: timeMinutes @@ -291,6 +291,8 @@ task View { File inputFile String outputPath = "output.vcf" + String? exclude + String? include String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -302,6 +304,8 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ + ~{"--include " + include} \ + ~{"--exclude " + exclude} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -324,7 +328,8 @@ task View { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4cf91963c64c48478c8009e65aa20678ad423eb9 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 10:44:10 +0100 Subject: [PATCH 430/902] add duphold --- duphold.sh | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 duphold.sh diff --git a/duphold.sh b/duphold.sh new file mode 100644 index 00000000..6e65ee5c --- /dev/null +++ b/duphold.sh @@ -0,0 +1,76 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Duphold { + input { + File inputVcf + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputPath = "./duphold.vcf" + + String memory = "15G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + } + + String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" + + command { + set -e + mkdir -p ~{outputDir} + export DUPHOLD_SAMPLE_NAME=~{sample} + duphold \ + -v ~{inputVcf} \ + -b ~{bamFile} \ + -f ~{referenceFasta} \ + -o ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } + sample: {description: "The name of the sample.", category: "required"} + outputDir: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + smooveVcf: {description: "Calls of structural variants in VCF file."} + } +} From fb65bfe1ab5e627cb23812264ab651748e844b89 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 11:13:40 +0100 Subject: [PATCH 431/902] add duphold.wdl --- duphold.sh => duphold.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) rename duphold.sh => duphold.wdl (92%) diff --git a/duphold.sh b/duphold.wdl similarity index 92% rename from duphold.sh rename to duphold.wdl index 6e65ee5c..9c7255ff 100644 --- a/duphold.sh +++ b/duphold.wdl @@ -32,7 +32,7 @@ task Duphold { String memory = "15G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" @@ -60,6 +60,7 @@ task Duphold { parameter_meta { # inputs + inputVcf: {description: "The VCF file to process.", category: "required"} bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index of the bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} @@ -71,6 +72,6 @@ task Duphold { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls of structural variants in VCF file."} + outputVcf: {description: "Duphold annotated VCF file."} } } From fca78c3d28d57b5ebfe802deccc52b86ae00c651 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 12:51:23 +0100 Subject: [PATCH 432/902] fix outputpath --- duphold.wdl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/duphold.wdl b/duphold.wdl index 9c7255ff..80fe31d2 100644 --- a/duphold.wdl +++ b/duphold.wdl @@ -35,11 +35,9 @@ task Duphold { String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } - String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" - command { set -e - mkdir -p ~{outputDir} + mkdir -p "$(dirname ~{outputPath})" export DUPHOLD_SAMPLE_NAME=~{sample} duphold \ -v ~{inputVcf} \ @@ -66,7 +64,7 @@ task Duphold { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } sample: {description: "The name of the sample.", category: "required"} - outputDir: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 80566da7e582afa0d445547fb3555a8f9cccae07 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 12:51:39 +0100 Subject: [PATCH 433/902] remove duphold parameter --- smoove.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 7a7e4305..d1011f6c 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -43,7 +43,6 @@ task Call { --fasta ~{referenceFasta} \ --removepr \ --genotype \ - --duphold \ ~{bamFile} } From 0232cf8e79dc6975eecc9a7d2336f45f2d191f05 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Dec 2020 16:19:20 +0100 Subject: [PATCH 434/902] add some taks --- hmftools.wdl | 47 ++++++++++++++++++++++++++++++++++++++ picard.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index fc56ecd9..f9a606e7 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,6 +269,53 @@ task GripssHardFilterApplicationKt { } } +task HealthChecker { + input { + String normalName + String tumorName + + String javaXmx = "10G" + } + + command { + java -Xmx10G \ + -jar /opt/tools/health-checker/3.1/health-checker.jar \ + -reference ~{normalName} \ + -tumor ~{tumorName} \ + -metrics_dir ~{metricsPath} \ + -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -output_dir ~{outputDir} + } + + # super("health-checker", + # Versions.HEALTH_CHECKER, + # "health-checker.jar", + # "10G", + # Lists.newArrayList("-reference", + # referenceSampleName, + # "-tumor", + # tumorSampleName, + # "-ref_wgs_metrics_file", + # referenceMetricsPath, + # "-tum_wgs_metrics_file", + # tumorMetricsPath, + # "-ref_flagstat_file", + # referenceFlagstatPath, + # "-tum_flagstat_file", + # tumorFlagstatPath, + # "-purple_dir", + # purplePath, + # "-output_dir", + # outputPath)); + + output { + + } + + +} + task Purple { input { String normalName diff --git a/picard.wdl b/picard.wdl index 1afa5ea7..88ddd313 100644 --- a/picard.wdl +++ b/picard.wdl @@ -315,6 +315,70 @@ task CollectTargetedPcrMetrics { } } +task CollectWgsMetrics { + input { + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + String outputPath = "./wgs_metrics.txt" + + Int? minimumMappingQuality + Int? minimumBaseQuality + Int? coverageCap + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + CollectWgsMetrics \ + REFERENCE_SEQUENCE=~{referenceFasta} \ + INPUT=~{inputBam} \ + OUTPUT=~{outputPath} \ + ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ + ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ + ~{"OVERAGE_CAP=" + coverageCap} + } + + output { + File metrics = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputPath: {description: "The path picard CollectWgsMetrics' output should be written to.", category: "common"} + minimumMappingQuality: {description: "Equivalent to picard CollectWgsMetrics' MINIMUM_MAPPING_QUALITY option.", category: "advanced"} + minimumBaseQuality: {description: "Equivalent to picard CollectWgsMetrics' MINIMUM_BASE_QUALITY option.", category: "advanced"} + coverageCap: {description: "Equivalent to picard CollectWgsMetrics' OVERAGE_CAP option.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CreateSequenceDictionary { input { File inputFile From 9896f4fcaba3d5ee9b070a03a21bc23484037fb1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 16 Dec 2020 14:08:56 +0100 Subject: [PATCH 435/902] add purple outputs --- bcftools.wdl | 2 +- bwa.wdl | 2 +- gridss.wdl | 2 +- hmftools.wdl | 39 ++++++++++++++++++++++++++++++++++----- sambamba.wdl | 2 +- 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1dba7611..c91460bb 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -50,7 +50,7 @@ task Annotate { Int threads = 0 String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 10 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bwa.wdl b/bwa.wdl index fdeb870f..44cfc9fe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -34,7 +34,7 @@ task Mem { Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 260 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } diff --git a/gridss.wdl b/gridss.wdl index c444c854..88655442 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,7 +35,7 @@ task GRIDSS { Int jvmHeapSizeGb = 30 Int threads = 2 - Int timeMinutes = ceil(1440 / threads) + 10 + Int timeMinutes = ceil(2880 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } diff --git a/hmftools.wdl b/hmftools.wdl index f9a606e7..86d90332 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "33G" String javaXmx = "32G" - Int timeMinutes = 60 + Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -114,7 +114,7 @@ task Cobalt { Int threads = 1 String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 60 + Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" } @@ -312,8 +312,6 @@ task HealthChecker { output { } - - } task Purple { @@ -327,6 +325,7 @@ task Purple { File somaticVcf File filteredSvVcf File fullSvVcf + File fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -365,7 +364,37 @@ task Purple { } output { - #TODO + File driverCatalogTsv = "~{outputDir}/~{tumorName}.driver.catalog.tsv" + File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" + File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" + File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" + File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv" + File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv" + File purpleQc = "~{outputDir}/~{tumorName}.purple.qc" + File purpleSegmentTsv = "~{outputDir}/~{tumorName}.purple.segment.tsv" + File purpleSomaticClonalityTsv = "~{outputDir}/~{tumorName}.purple.somatic.clonality.tsv" + File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" + File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" + File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" + File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" + File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" + File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" + File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png" + File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png" + File mapPlot = "~{outputDir}/plot/~{tumorName}.map.png" + File purityRangePlot = "~{outputDir}/plot/~{tumorName}.purity.range.png" + File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" + File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" + File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" + File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" + File purpleVersion = "~{outputDir}/purple.version" + Array[File] outputs = [driverCatalogTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, + purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, + purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, + purpleVersion] + Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, + segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] } runtime { diff --git a/sambamba.wdl b/sambamba.wdl index cd8da21e..3fc57c65 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -41,7 +41,7 @@ task Markdup { Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 16) / threads } String bamIndexPath = sub(outputPath, "\.bam$", ".bai") From df51100b8ffd6cb2dee27859b46ef94d901f4715 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Tue, 22 Dec 2020 13:41:30 +0100 Subject: [PATCH 436/902] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22f41826..424dc764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. + Fixed the `size` call in the default for gffread's timeMinutes, to retrieve From f60a018191e1b96a5abdfae8b68d4ae4d3ee3b06 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Tue, 22 Dec 2020 13:42:05 +0100 Subject: [PATCH 437/902] add tasks to create input files for DGE analysis --- prepareShiny.wdl | 108 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 prepareShiny.wdl diff --git a/prepareShiny.wdl b/prepareShiny.wdl new file mode 100644 index 00000000..d304798d --- /dev/null +++ b/prepareShiny.wdl @@ -0,0 +1,108 @@ +version 1.0 + +# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task CreateDesignMatrix { + input { + File countTable + String shinyDir = "." + + Int threads = 1 + String memory = "5G" + Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + } + + command { + set -e + mkdir -p ${shinyDir} + predex design \ + -i ${countTable} \ + -o ${shinyDir} + } + + output { + File dgeDesign = shinyDir + "/design_matrix.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + countTable: {description: "The created count table from HTseq.", category: "required"} + shinyDir: {description: "The directory to write the output to.", category: "required"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task CreateAnnotation { + input { + File referenceFasta + File referenceGtfFile + String shinyDir = "." + + Int threads = 1 + String memory = "10G" + Int timeMinutes = 90 + String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + } + + command { + set -e + mkdir -p ${shinyDir} + predex annotation \ + -f ${referenceFasta} \ + -g ${referenceGtfFile} \ + -o ${shinyDir} + } + + output { + File dgeAnnotation = shinyDir + "/annotation.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + referenceFasta: {description: "The reference Fasta file.", category: "required"} + referenceGtfFile: {description: "The reference GTF file.", category: "required"} + shinyDir: {description: "The directory to write the output to.", category: "required"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From fcd32243e1aaa62a842435e5cc2671843d8afc54 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Thu, 24 Dec 2020 13:12:23 +0100 Subject: [PATCH 438/902] style update --- prepareShiny.wdl | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/prepareShiny.wdl b/prepareShiny.wdl index d304798d..81354a16 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -33,10 +33,10 @@ task CreateDesignMatrix { command { set -e - mkdir -p ${shinyDir} + mkdir -p ~{shinyDir} predex design \ - -i ${countTable} \ - -o ${shinyDir} + -i ~{countTable} \ + -o ~{shinyDir} } output { @@ -51,14 +51,16 @@ task CreateDesignMatrix { } parameter_meta { + # inputs countTable: {description: "The created count table from HTseq.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - + shinyDir: {description: "The directory to write the output to.", category: "required"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dgeDesign: {description: "Design matrix template to add sample information for DGE analysis."} } } @@ -76,11 +78,11 @@ task CreateAnnotation { command { set -e - mkdir -p ${shinyDir} + mkdir -p ~{shinyDir} predex annotation \ - -f ${referenceFasta} \ - -g ${referenceGtfFile} \ - -o ${shinyDir} + -f ~{referenceFasta} \ + -g ~{referenceGtfFile} \ + -o ~{shinyDir} } output { @@ -95,14 +97,16 @@ task CreateAnnotation { } parameter_meta { + # inputs referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtfFile: {description: "The reference GTF file.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - + shinyDir: {description: "The directory to write the output to.", category: "required"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dgeAnnotation: {description: "Annotation file for DGE analysis."} } } From ca452303add0b2afeabb6595e09c7a036df58fc3 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Mon, 28 Dec 2020 10:31:34 +0100 Subject: [PATCH 439/902] annotation update --- prepareShiny.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/prepareShiny.wdl b/prepareShiny.wdl index 81354a16..13cd0b1c 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -28,15 +28,15 @@ task CreateDesignMatrix { Int threads = 1 String memory = "5G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } command { set -e mkdir -p ~{shinyDir} predex design \ - -i ~{countTable} \ - -o ~{shinyDir} + --input ~{countTable} \ + --output ~{shinyDir} } output { @@ -70,19 +70,19 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - Int threads = 1 - String memory = "10G" - Int timeMinutes = 90 - String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + Int threads = 2 + String memory = "5G" + Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } command { set -e mkdir -p ~{shinyDir} predex annotation \ - -f ~{referenceFasta} \ - -g ~{referenceGtfFile} \ - -o ~{shinyDir} + --fasta ~{referenceFasta} \ + --gtf ~{referenceGtfFile} \ + --output ~{shinyDir} } output { From 48d468d7c97e4b9e3ee892ff49b3fdda4fee9de9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 Jan 2021 11:11:41 +0100 Subject: [PATCH 440/902] add note to HealthChecker --- hmftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/hmftools.wdl b/hmftools.wdl index 86d90332..760fb63f 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -270,6 +270,7 @@ task GripssHardFilterApplicationKt { } task HealthChecker { + # WIP input { String normalName String tumorName From c482e833fa60a8a138b8045dc3f044be0655599c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 Jan 2021 11:31:52 +0100 Subject: [PATCH 441/902] comment out healthchecker task and remove duplicate input in bcftools annotate --- bcftools.wdl | 4 +-- hmftools.wdl | 90 ++++++++++++++++++++++++++-------------------------- 2 files changed, 46 insertions(+), 48 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 8721540a..14889dff 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,9 +44,7 @@ task Annotate { String? regions File? regionsFile File? renameChrs - File? samplesFile - Boolean singleOverlaps = false - + File? samplesFile Int threads = 0 String memory = "256M" diff --git a/hmftools.wdl b/hmftools.wdl index 760fb63f..16313fca 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,51 +269,51 @@ task GripssHardFilterApplicationKt { } } -task HealthChecker { - # WIP - input { - String normalName - String tumorName - - String javaXmx = "10G" - } - - command { - java -Xmx10G \ - -jar /opt/tools/health-checker/3.1/health-checker.jar \ - -reference ~{normalName} \ - -tumor ~{tumorName} \ - -metrics_dir ~{metricsPath} \ - -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ - -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -output_dir ~{outputDir} - } - - # super("health-checker", - # Versions.HEALTH_CHECKER, - # "health-checker.jar", - # "10G", - # Lists.newArrayList("-reference", - # referenceSampleName, - # "-tumor", - # tumorSampleName, - # "-ref_wgs_metrics_file", - # referenceMetricsPath, - # "-tum_wgs_metrics_file", - # tumorMetricsPath, - # "-ref_flagstat_file", - # referenceFlagstatPath, - # "-tum_flagstat_file", - # tumorFlagstatPath, - # "-purple_dir", - # purplePath, - # "-output_dir", - # outputPath)); - - output { - - } -} +# task HealthChecker { +# # WIP +# input { +# String normalName +# String tumorName +# +# String javaXmx = "10G" +# } +# +# command { +# java -Xmx10G \ +# -jar /opt/tools/health-checker/3.1/health-checker.jar \ +# -reference ~{normalName} \ +# -tumor ~{tumorName} \ +# -metrics_dir ~{metricsPath} \ +# -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ +# -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ +# -output_dir ~{outputDir} +# } +# +# # super("health-checker", +# # Versions.HEALTH_CHECKER, +# # "health-checker.jar", +# # "10G", +# # Lists.newArrayList("-reference", +# # referenceSampleName, +# # "-tumor", +# # tumorSampleName, +# # "-ref_wgs_metrics_file", +# # referenceMetricsPath, +# # "-tum_wgs_metrics_file", +# # tumorMetricsPath, +# # "-ref_flagstat_file", +# # referenceFlagstatPath, +# # "-tum_flagstat_file", +# # tumorFlagstatPath, +# # "-purple_dir", +# # purplePath, +# # "-output_dir", +# # outputPath)); +# +# output { +# +# } +# } task Purple { input { From 7988dbb2259f9a396fd19c514c48731e96d49e42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Jan 2021 11:06:09 +0100 Subject: [PATCH 442/902] make reference annotation optional for gffcompare --- gffcompare.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index 50cab8a6..8b135479 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -23,7 +23,6 @@ version 1.0 task GffCompare { input { Array[File] inputGtfFiles - File referenceAnnotation # gffcmp is the default used by the program as well. This needs to be # defined in order for the output values to be consistent and correct. String outPrefix = "gffcmp" @@ -40,6 +39,7 @@ task GffCompare { Boolean debugMode = false File? inputGtfList + File? referenceAnnotation String? outputDir File? genomeSequences Int? maxDistanceFreeEndsTerminalExons @@ -64,7 +64,7 @@ task GffCompare { set -e ~{"mkdir -p " + outputDir} gffcompare \ - -r ~{referenceAnnotation} \ + ~{"-r " + referenceAnnotation} \ ~{"-o '" + totalPrefix + "'"} \ ~{"-s " + genomeSequences} \ ~{"-e " + maxDistanceFreeEndsTerminalExons} \ From c22629ff7ec5c57f113ed79e2fc2784ee915b89f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 15 Jan 2021 15:03:13 +0100 Subject: [PATCH 443/902] add linx task, add more inputs to sage --- hmftools.wdl | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 16313fca..15f54937 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -315,6 +315,110 @@ task GripssHardFilterApplicationKt { # } # } +task Linx { + input { + String sampleName + File svVcf + File svVcfIndex + Array[File]+ purpleOutput + File referenceFasta + File referenceFastaFai + File referenceFastaDict + String refGenomeVersion + String outputDir = "./linx" + File fragileSiteCsv + File lineElementCsv + File replicationOriginsBed + File viralHostsCsv + File knownFusionCsv + File driverGenePanel + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 30 + String dockerImage = "docker://quay.io/biocontainers/hmftools-linx:1.12--0" + } + + command { + linx -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -sv_vcf ~{svVcf} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -output_dir ~{outputDir} \ + -fragile_site_file ~{fragileSiteCsv} \ + -line_element_file ~{lineElementCsv} \ + -replication_origins_file ~{replicationOriginsBed} \ + -viral_hosts_file ~{viralHostsCsv} \ + -gene_transcripts_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -check_fusions \ + -known_fusion_file ~{knownFusionCsv} \ + -check_drivers \ + -driver_gene_panel ~{driverGenePanel} \ + -chaining_sv_limit 0 \ + -write_vis_data + } + + output { + File driverCatalog = "~{outputDir}/~{sampleName}.driver.catalog.tsv" + File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" + File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" + File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" + File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" + File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" + File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" + File linxViralInserts = "~{outputDir}/~{sampleName}.linx.viral_inserts.tsv" + File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" + File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" + File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" + File linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv" + File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" + File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" + File linxVersion = "~{outputDir}/linx.version" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category: "required"} + svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} + svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} + purpleOutput: {description: "The files produced by PURPLE.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} + lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} + replicationOriginsBed: {description: "Replication timing input in BED format with replication timing as the 4th column.", category: "required"} + viralHostsCsv: {description: "A list of the viruses which were used for annotation of the GRIDSS results.", category: "required"} + knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Purple { input { String normalName @@ -419,7 +523,7 @@ task Purple { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - driverGenePanel: {description: "A bed file describing the driver gene panel.", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} @@ -444,11 +548,20 @@ task Sage { File panelBed File highConfidenceBed Boolean hg38 = false + Boolean panelOnly = false String outputPath = "./sage.vcf.gz" String? normalName File? normalBam File? normalBamIndex + Int? hotspotMinTumorQual + Int? panelMinTumorQual + Int? hotspotMaxGermlineVaf + Int? hotspotMaxGermlineRelRawBaseQual + Int? panelMaxGermlineVaf + Int? panelMaxGermlineRelRawBaseQual + String? mnvFilterEnabled + File? coverageBed Int threads = 2 String javaXmx = "32G" @@ -470,6 +583,15 @@ task Sage { -panel_bed ~{panelBed} \ -high_confidence_bed ~{highConfidenceBed} \ -assembly ~{true="hg38" false="hg19" hg38} \ + ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \ + ~{"-panel_min_tumor_qual " + panelMinTumorQual} \ + ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \ + ~{"-hotspot_max_germline_rel_raw_base_qual " + hotspotMaxGermlineRelRawBaseQual} \ + ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ + ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ + ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ + ~{"-coverage_bed " + coverage_bed} \ + ~{true="-panel_only" false="" panelOnly} \ -threads ~{threads} \ -out ~{outputPath} } @@ -502,6 +624,13 @@ task Sage { hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} + hotspotMinTumorQual: {description: "Equivalent to sage's `hotspot_min_tumor_qual` option.", category: "advanced"} + panelMinTumorQual: {description: "Equivalent to sage's `panel_min_tumor_qual` option.", category: "advanced"} + hotspotMaxGermlineVaf: {description: "Equivalent to sage's `hotspot_max_germline_vaf` option.", category: "advanced"} + hotspotMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `hotspot_max_germline_rel_raw_base_qual` option.", category: "advanced"} + panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} + panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} + mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 111a42bf79d1fb8fa6a34d7b567dc4fc04f67e7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 21 Jan 2021 14:23:53 +0100 Subject: [PATCH 444/902] fix typos --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 15f54937..6de3f777 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -590,7 +590,7 @@ task Sage { ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ - ~{"-coverage_bed " + coverage_bed} \ + ~{"-coverage_bed " + coverageBed} \ ~{true="-panel_only" false="" panelOnly} \ -threads ~{threads} \ -out ~{outputPath} From 96fa1bc6ba59825f051c0577d414027fd58f10c4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jan 2021 16:38:49 +0100 Subject: [PATCH 445/902] fix some issues, add flagstat --- bcftools.wdl | 1 - hmftools.wdl | 6 +++++- picard.wdl | 4 ++-- sambamba.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 14889dff..b239320d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -106,7 +106,6 @@ task Annotate { inputFile: {description: "A vcf or bcf file.", category: "required"} inputFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "common"} annsFileIndex: {description: "The index for annsFile.", category: "common"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} diff --git a/hmftools.wdl b/hmftools.wdl index 6de3f777..67c49be3 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -341,7 +341,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 30 - String dockerImage = "docker://quay.io/biocontainers/hmftools-linx:1.12--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.12--0" } command { @@ -381,6 +381,10 @@ task Linx { File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" File linxVersion = "~{outputDir}/linx.version" + Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, + linxLinks, linxSvs, linxViralInserts, linxVisCopyNumber, + linxVisFusion, linxVisGeneExon, linxVisProteinDomain, + linxVisSegments, linxVisSvData, linxVersion] } runtime { diff --git a/picard.wdl b/picard.wdl index d52b9cc7..8dc4e0bf 100644 --- a/picard.wdl +++ b/picard.wdl @@ -473,10 +473,10 @@ task CollectWgsMetrics { CollectWgsMetrics \ REFERENCE_SEQUENCE=~{referenceFasta} \ INPUT=~{inputBam} \ - OUTPUT=~{outputPath} \ + OUTPUT=~{outputPath} \ ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ - ~{"OVERAGE_CAP=" + coverageCap} + ~{"COVERAGE_CAP=" + coverageCap} } output { diff --git a/sambamba.wdl b/sambamba.wdl index 0e9a901c..bb63f665 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -20,6 +20,49 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task Flagstat { + input { + File inputBam + File inputBamIndex + String outputPath = "./flagstat.txt" + + Int threads = 2 + String memory = "8G" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + } + + command { + sambamba flagstat \ + -t ~{threads} \ + ~{inputBam} \ + > ~{outputPath} + } + + output { + File stats = outputPath + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputBam: {description: "The input BAM file.", category: "required"} + inputBamIndex: {description: "The index for the BAM file.", category: "required"} + outputPath: {description: "The path to write the ouput to.", category: "required"} + + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + + task Markdup { input { Array[File] inputBams From 8b51723e40a28d8894015f8b4dad21fcb0cb4bd1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jan 2021 16:39:56 +0100 Subject: [PATCH 446/902] add extractSigPredictHRD --- extractSigPredictHRD.wdl | 69 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 extractSigPredictHRD.wdl diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl new file mode 100644 index 00000000..6aa5ff1d --- /dev/null +++ b/extractSigPredictHRD.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task ExtractSigPredictHRD { + input { + String outputDir = "." + String sampleName + File snvIndelVcf + File snvIndelVcfIndex + File svVcf + File svVcfIndex + + String memory = "8G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" + } + + command { + extractSigPredictHRD.R \ + ~{outputDir} \ + ~{sampleName} \ + ~{snvIndelVcf} \ + ~{svVcf} \ + } + + output { + File chordPrediction = "~{outputDir}/~{sampleName}_chord_prediction.txt" + File chordSignatures = "~{outputDir}/~{sampleName}_chord_signatures.txt" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + outputDir: {description: "The directory the outout will be written to.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + snvIndelVcf: {description: "A VCF file with SNVs and indels.", category: "required"} + snvIndelVcfIndex: {description: "The index for the SNV/indel VCF file.", category: "required"} + svVcf: {description: "A VCF file with SVs.", category: "required"} + svVcfIndex: {description: "The index for the SV VCF file.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file From a4ebccba572cb4b0114c80b91083eafc203fa92b Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Thu, 4 Feb 2021 09:22:33 +0100 Subject: [PATCH 447/902] change threads --- prepareShiny.wdl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/prepareShiny.wdl b/prepareShiny.wdl index 13cd0b1c..d669e2d1 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -25,7 +25,6 @@ task CreateDesignMatrix { File countTable String shinyDir = "." - Int threads = 1 String memory = "5G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" @@ -44,7 +43,6 @@ task CreateDesignMatrix { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -53,8 +51,7 @@ task CreateDesignMatrix { parameter_meta { # inputs countTable: {description: "The created count table from HTseq.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - threads: {description: "The number of threads to use.", category: "advanced"} + shinyDir: {description: "The directory to write the output to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -70,7 +67,6 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - Int threads = 2 String memory = "5G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" @@ -90,7 +86,6 @@ task CreateAnnotation { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -100,8 +95,7 @@ task CreateAnnotation { # inputs referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtfFile: {description: "The reference GTF file.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - threads: {description: "The number of threads to use.", category: "advanced"} + shinyDir: {description: "The directory to write the output to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 558c8088dee1d252fb668303874684fd62741409 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 4 Feb 2021 15:38:11 +0100 Subject: [PATCH 448/902] add health-checker --- hmftools.wdl | 106 +++++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 45 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 67c49be3..5bad1dbe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,51 +269,67 @@ task GripssHardFilterApplicationKt { } } -# task HealthChecker { -# # WIP -# input { -# String normalName -# String tumorName -# -# String javaXmx = "10G" -# } -# -# command { -# java -Xmx10G \ -# -jar /opt/tools/health-checker/3.1/health-checker.jar \ -# -reference ~{normalName} \ -# -tumor ~{tumorName} \ -# -metrics_dir ~{metricsPath} \ -# -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ -# -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -# -output_dir ~{outputDir} -# } -# -# # super("health-checker", -# # Versions.HEALTH_CHECKER, -# # "health-checker.jar", -# # "10G", -# # Lists.newArrayList("-reference", -# # referenceSampleName, -# # "-tumor", -# # tumorSampleName, -# # "-ref_wgs_metrics_file", -# # referenceMetricsPath, -# # "-tum_wgs_metrics_file", -# # tumorMetricsPath, -# # "-ref_flagstat_file", -# # referenceFlagstatPath, -# # "-tum_flagstat_file", -# # tumorFlagstatPath, -# # "-purple_dir", -# # purplePath, -# # "-output_dir", -# # outputPath)); -# -# output { -# -# } -# } +task HealthChecker { + # WIP + input { + String outputDir = "." + String normalName + File normalFlagstats + File normalMetrics + String tumorName + File tumorFlagstats + File tumorMetrics + Array[File]+ purpleOutput + + String javaXmx = "10G" + String memory = "11G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biowdl/health-checker:3.2" + } + + command { + set -e + mkdir -p ~{outputDir} + health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -reference ~{normalName} \ + -ref_flagstat_file ~{normalFlagstats} \ + -ref_wgs_metrics_file ~{normalMetrics} \ + -tumor ~{tumorName} \ + -tum_flagstat_file ~{tumorFlagstats} \ + -tum_wgs_metrics_file ~{tumorMetrics} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -output_dir ~{outputDir} + } + + + output { + File? healthCheckSucceeded = "~{outputDir}/~{tumorName}.HealthCheckSucceeded" + File? healthCheckFailed = "~{outputDir}/~{tumorName}.HealthCheckFailed" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + outputDir: {description: "The path the output will be written to.", category:"required"} + normalName: {description: "The name of the normal sample.", category: "required"} + normalFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + normalMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} + tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} + purpleOutput: {description: "The files from purple's output directory.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} task Linx { input { From eac2b302158e412df419705eba39ebaeedc1c11f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 9 Feb 2021 16:10:52 +0100 Subject: [PATCH 449/902] small adjustments --- bwa.wdl | 4 +++- gridss.wdl | 6 +++--- hmftools.wdl | 12 ++++++------ sambamba.wdl | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 203f0dde..e2393481 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -28,6 +28,7 @@ task Mem { String outputPrefix Boolean sixtyFour = false Boolean usePostalt = false + Boolean useSoftclippingForSupplementary = false Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 @@ -36,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 260 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 500 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -56,6 +57,7 @@ task Mem { mkdir -p "$(dirname ~{outputPrefix})" bwa mem \ -t ~{threads} \ + ~{if useSoftclippingForSupplementary then "-Y" else ""} \ ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ ~{bwaIndex.fastaFile} \ ~{read1} \ diff --git a/gridss.wdl b/gridss.wdl index 9bafa6d6..0148fcf6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,9 +35,9 @@ task GRIDSS { String? normalLabel Int jvmHeapSizeGb = 30 - Int threads = 2 - Int timeMinutes = ceil(2880 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + Int threads = 4 + Int timeMinutes = ceil(5760 / threads) + 10 + String dockerImage = "quay.io/biocontainers/gridss:2.9.3--0" } command { diff --git a/hmftools.wdl b/hmftools.wdl index 5bad1dbe..90564060 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -181,13 +181,13 @@ task GripssApplicationKt { String memory = "25G" String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ @@ -234,13 +234,13 @@ task GripssHardFilterApplicationKt { String memory = "25G" String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} diff --git a/sambamba.wdl b/sambamba.wdl index bb63f665..5284363e 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 16) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 32) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 2792266fa2950ec9cbe15530374465a99c65a43a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 Feb 2021 09:52:04 +0100 Subject: [PATCH 450/902] update versions, memory, etc --- bwa.wdl | 2 +- extractSigPredictHRD.wdl | 2 ++ gridss.wdl | 9 ++++++++- hmftools.wdl | 29 ++++++++++++++--------------- sambamba.wdl | 6 +++--- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index e2393481..faa4121a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.5) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 6aa5ff1d..69c41ef8 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -28,6 +28,7 @@ task ExtractSigPredictHRD { File snvIndelVcfIndex File svVcf File svVcfIndex + Boolean hg38 = false String memory = "8G" Int timeMinutes = 15 @@ -40,6 +41,7 @@ task ExtractSigPredictHRD { ~{sampleName} \ ~{snvIndelVcf} \ ~{svVcf} \ + ~{if hg38 then "RG_38" else "RG_37"} } output { diff --git a/gridss.wdl b/gridss.wdl index 0148fcf6..98d730cf 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,23 +33,28 @@ task GRIDSS { File? normalBam File? normalBai String? normalLabel + File? blacklistBed + File? repeatmaskerBed Int jvmHeapSizeGb = 30 Int threads = 4 Int timeMinutes = ceil(5760 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.3--0" + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } command { set -e mkdir -p "$(dirname ~{outputPrefix})" gridss \ + -w . \ --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + ~{"--blacklist " + blacklistBed} \ + ~{"--repeatmaskerbed " + repeatmaskerBed} ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz @@ -80,6 +85,8 @@ task GRIDSS { normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} + blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} + repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} diff --git a/hmftools.wdl b/hmftools.wdl index 90564060..e98ac7ba 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -115,7 +115,7 @@ task Cobalt { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1200 - String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } command { @@ -172,6 +172,8 @@ task GripssApplicationKt { input { File inputVcf String outputPath = "gripss.vcf.gz" + String tumorName + String normalName File referenceFasta File referenceFastaFai File referenceFastaDict @@ -182,13 +184,15 @@ task GripssApplicationKt { String memory = "25G" String javaXmx = "24G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ + -tumor ~{tumorName} \ + ~reference ~{normalName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ @@ -235,12 +239,12 @@ task GripssHardFilterApplicationKt { String memory = "25G" String javaXmx = "24G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} @@ -357,7 +361,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.12--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" } command { @@ -455,13 +459,13 @@ task Purple { File referenceFastaFai File referenceFastaDict File driverGenePanel - File hotspots + File somaticHotspots Int threads = 1 Int timeMinutes = 60 String memory = "13G" String javaXmx = "12G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.51--1" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" } command { @@ -479,13 +483,8 @@ task Purple { -ref_genome ~{referenceFasta} \ -driver_catalog \ -driver_gene_panel ~{driverGenePanel} \ - -hotspots ~{hotspots} \ + -somatic_hotspots ~{somaticHotspots} \ -threads ~{threads} - - # TODO if shallow also the following: - #-highly_diploid_percentage 0.88 \ - #-somatic_min_total 100 \ - #-somatic_min_purity_spread 0.1 } output { @@ -587,7 +586,7 @@ task Sage { String javaXmx = "32G" String memory = "33G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" } command { diff --git a/sambamba.wdl b/sambamba.wdl index 5284363e..b6ef5e9b 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -34,8 +34,8 @@ task Flagstat { command { sambamba flagstat \ - -t ~{threads} \ - ~{inputBam} \ + -t ~{threads} \ + ~{inputBam} \ > ~{outputPath} } @@ -84,7 +84,7 @@ task Markdup { # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 32) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 943f9541ebc002ea576898067b7f220112cb79fc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 Feb 2021 13:56:15 +0100 Subject: [PATCH 451/902] fix parameter_meta purple --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index e98ac7ba..3fe845a6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -543,7 +543,7 @@ task Purple { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + somaticHotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 8283c5099ba6fad50b34043033380e2898d3db66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 18 Feb 2021 11:03:27 +0100 Subject: [PATCH 452/902] fix missing backslash --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 98d730cf..b4b36b01 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -54,7 +54,7 @@ task GRIDSS { ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ - ~{"--repeatmaskerbed " + repeatmaskerBed} + ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz From adc3523872df29405e1741eaa2dfa2a67e61a51d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 11:00:46 +0100 Subject: [PATCH 453/902] fix sage --- hmftools.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3fe845a6..49e4eeb4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -590,9 +590,7 @@ task Sage { } command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ + SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ From a8314de9c3a2746eb44bf041fe1849c49241e547 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 16:30:52 +0100 Subject: [PATCH 454/902] add -c to stringtie --- stringtie.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stringtie.wdl b/stringtie.wdl index d3a6f73d..9c2f3cfc 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -31,6 +31,7 @@ task Stringtie { Boolean? firstStranded Boolean? secondStranded String? geneAbundanceFile + Float? minimumCoverage Int threads = 1 String memory = "2G" @@ -47,6 +48,7 @@ task Stringtie { ~{true="-e" false="" skipNovelTranscripts} \ ~{true="--rf" false="" firstStranded} \ ~{true="--fr" false="" secondStranded} \ + ~{"-c " + minimumCoverage} \ -o ~{assembledTranscriptsFile} \ ~{"-A " + geneAbundanceFile} \ ~{bam} @@ -74,6 +76,7 @@ task Stringtie { firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"} secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"} geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"} + minimumCoverage: {description: "The minimum coverage for a transcript to be shown in the output.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From f468bd568b5d9fcbd66872934837a4f88a4f2f0b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 16:43:57 +0100 Subject: [PATCH 455/902] add index to htseq --- htseq.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/htseq.wdl b/htseq.wdl index dfa3fcf2..ef4ae0a3 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -23,6 +23,7 @@ version 1.0 task HTSeqCount { input { Array[File]+ inputBams + Array[File]+ inputBamIndexes File gtfFile String outputTable = "output.tsv" String order = "pos" @@ -34,7 +35,7 @@ task HTSeqCount { Int nprocesses = 1 String memory = "8G" - Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) + Int timeMinutes = 1440 #10 + ceil(size(inputBams, "G") * 60) FIXME String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" } From 070a5d81abd11bc0318f4957b7ef418df2f61c40 Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 17:39:45 +0100 Subject: [PATCH 456/902] task: add duphold.wdl --- duphold.wdl | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 duphold.wdl diff --git a/duphold.wdl b/duphold.wdl new file mode 100644 index 00000000..80fe31d2 --- /dev/null +++ b/duphold.wdl @@ -0,0 +1,75 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Duphold { + input { + File inputVcf + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputPath = "./duphold.vcf" + + String memory = "15G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + export DUPHOLD_SAMPLE_NAME=~{sample} + duphold \ + -v ~{inputVcf} \ + -b ~{bamFile} \ + -f ~{referenceFasta} \ + -o ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputVcf: {description: "The VCF file to process.", category: "required"} + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } + sample: {description: "The name of the sample.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Duphold annotated VCF file."} + } +} From aef20c2a69816a367700441ba9d4a121faf9a72f Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 17:42:24 +0100 Subject: [PATCH 457/902] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 424dc764..8d6d1b76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Duphold: add duphold.wdl + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. From 5fc58ce1f5585a5bb4078b095674b67aba8d8f7d Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 21:43:20 +0100 Subject: [PATCH 458/902] add bcftools view filtering options --- bcftools.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28380dea..5f6c2a16 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,7 +290,10 @@ task View { input { File inputFile String outputPath = "output.vcf" - + + String? exclude + String? include + Boolean excludeUncalled = false String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -302,6 +305,7 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ + ~{true="--exclude-uncalled" false="" firstAlleleOnly} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -324,6 +328,8 @@ task View { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 70d7a2b361a8faa2cab4b02accd2abd8da3068d0 Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 23:51:17 +0100 Subject: [PATCH 459/902] add option for bcftools view filtering --- bcftools.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5f6c2a16..50b08ee6 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,10 +290,9 @@ task View { input { File inputFile String outputPath = "output.vcf" - String? exclude String? include - Boolean excludeUncalled = false + Boolean excludeUncalled = false String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -305,7 +304,9 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ - ~{true="--exclude-uncalled" false="" firstAlleleOnly} \ + ~{"--exclude " + exclude} \ + ~{"--include " + include} \ + ~{true="--exclude-uncalled" false="" excludeUncalled} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -330,6 +331,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + excludeUncalled: {description: "exclude sites without a called genotype (see man page for details).", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 42f6cd2a9c38ba2da8f07db2f7df17b70d99a5d9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 22 Feb 2021 10:27:48 +0100 Subject: [PATCH 460/902] fix purple output for newer version --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 49e4eeb4..31330a7d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -488,7 +488,7 @@ task Purple { } output { - File driverCatalogTsv = "~{outputDir}/~{tumorName}.driver.catalog.tsv" + File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" @@ -512,7 +512,7 @@ task Purple { File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" - Array[File] outputs = [driverCatalogTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, From 2d41a2e22783b6208c1cdf8e7906e388bbfb7a89 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 22 Feb 2021 13:26:08 +0100 Subject: [PATCH 461/902] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d6d1b76..cbd083c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled) + Duphold: add duphold.wdl + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. From 8238579043ccd2df72ef7b270e9d44248b257715 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:27 +0100 Subject: [PATCH 462/902] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbd083c3..4ee68a91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled) ++ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. From ff4edf7a505234bef2e3102d06152148ae84eaa0 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:33 +0100 Subject: [PATCH 463/902] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ee68a91..5e175c6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). -+ Duphold: add duphold.wdl ++ Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. From 505c4fc02f8fa22cd512e1c890a984febcd89531 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:40 +0100 Subject: [PATCH 464/902] Update bcftools.wdl Co-authored-by: Jasper --- bcftools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 50b08ee6..d01a0c03 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,9 +290,11 @@ task View { input { File inputFile String outputPath = "output.vcf" + Boolean excludeUncalled = false + String? exclude String? include - Boolean excludeUncalled = false + String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" From 119e2aca92129ccd520ea4f0d9ab8ca768330e7e Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:46 +0100 Subject: [PATCH 465/902] Update bcftools.wdl Co-authored-by: Jasper --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index d01a0c03..4dc4edb5 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -333,7 +333,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} - excludeUncalled: {description: "exclude sites without a called genotype (see man page for details).", category: "advanced"} + excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ee6e66bea74597352161d3da231ce4df45acf39e Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 4 Mar 2021 16:42:38 +0100 Subject: [PATCH 466/902] add tmpdir --- umi-tools.wdl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 6524d656..b05fcace 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -78,10 +78,13 @@ task Dedup { File inputBam File inputBamIndex String outputBamPath + String tmpDir + Boolean paired = true String? umiSeparator String? statsPrefix + String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) @@ -93,13 +96,14 @@ task Dedup { command { set -e - mkdir -p "$(dirname ~{outputBamPath})" + mkdir -p "$(dirname ~{outputBamPath})" "~{tmpDir}" umi_tools dedup \ - --stdin ~{inputBam} \ - --stdout ~{outputBamPath} \ + --stdin=~{inputBam} \ + --stdout=~{outputBamPath} \ ~{"--output-stats " + statsPrefix} \ ~{"--umi-separator=" + umiSeparator} \ - ~{true="--paired" false="" paired} + ~{true="--paired" false="" paired} \ + --temp-dir=~{tmpDir} \ samtools index ~{outputBamPath} ~{outputBamIndex} } @@ -122,6 +126,7 @@ task Dedup { inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} + outputBamPath: {description: "Temporary directory.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} From 4edc1284f86c713dd5e23e8dba79c8a0f3a20219 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 4 Mar 2021 17:03:12 +0100 Subject: [PATCH 467/902] update umi-tools.wdl --- CHANGELOG.md | 1 + umi-tools.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e175c6d..437294cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ UMI-tools (dedup): Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. diff --git a/umi-tools.wdl b/umi-tools.wdl index b05fcace..db888603 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -126,7 +126,7 @@ task Dedup { inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} - outputBamPath: {description: "Temporary directory.", category: "advanced"} + tmpDir: {description: "Temporary directory.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} From 2a151b5014c34ea28498da909806cfa70da65d47 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 8 Mar 2021 10:27:12 +0100 Subject: [PATCH 468/902] add default tmpdir --- umi-tools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index db888603..5e08e14d 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -78,7 +78,7 @@ task Dedup { File inputBam File inputBamIndex String outputBamPath - String tmpDir + String tmpDir = "./umiToolsDedupTmpDir" Boolean paired = true From f8f2b9e4058d29bdd21bb92694bb425c3724f31b Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 8 Mar 2021 10:27:55 +0100 Subject: [PATCH 469/902] update default dockerimage --- umi-tools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 5e08e14d..1a7db327 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -89,7 +89,7 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" + String dockerImage = "quay.io/biocontainers/umi_tools:1.1.1--py38h0213d0e_1" } String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") From 3b5f1476fb34d215d6332b127995ff3ab1b82f20 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 8 Mar 2021 11:02:53 +0100 Subject: [PATCH 470/902] remove umitools deduped BAM index output --- umi-tools.wdl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 1a7db327..e909e481 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -103,13 +103,11 @@ task Dedup { ~{"--output-stats " + statsPrefix} \ ~{"--umi-separator=" + umiSeparator} \ ~{true="--paired" false="" paired} \ - --temp-dir=~{tmpDir} \ - samtools index ~{outputBamPath} ~{outputBamIndex} + --temp-dir=~{tmpDir} } output { File deduppedBam = outputBamPath - File deduppedBamIndex = outputBamIndex File? editDistance = "~{statsPrefix}_edit_distance.tsv" File? umiStats = "~{statsPrefix}_per_umi.tsv" File? positionStats = "~{statsPrefix}_per_umi_per_position.tsv" @@ -136,7 +134,6 @@ task Dedup { # outputs deduppedBam: {description: "Deduplicated BAM file."} - deduppedBamIndex: {description: "Index of the deduplicated BAM file."} editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."} umiStats: {description: "UMI-level summary statistics."} positionStats: {description: "The counts for unique combinations of UMI and position."} From 0f448cf27ea9812f938a37cb783bd7ce115d32a6 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Mon, 8 Mar 2021 13:08:38 +0100 Subject: [PATCH 471/902] Update umi-tools.wdl Co-authored-by: Davy Cats --- umi-tools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 1a7db327..20f1a37e 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -84,7 +84,6 @@ task Dedup { String? umiSeparator String? statsPrefix - String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) From 7c8209efa3f8c9ed6d9c716c3357008d8be7e809 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 8 Mar 2021 13:09:50 +0100 Subject: [PATCH 472/902] remove comment --- umi-tools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index e909e481..e3c833f8 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -88,7 +88,6 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). String dockerImage = "quay.io/biocontainers/umi_tools:1.1.1--py38h0213d0e_1" } From 8623c57dbca49543e4a5ee8108316ef46242bcde Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Mar 2021 14:05:13 +0100 Subject: [PATCH 473/902] add circos configs to purple output --- hmftools.wdl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 31330a7d..2fad41fe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -512,6 +512,17 @@ task Purple { File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" + File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" + File circosCond = "~{outputDir}/circos/~{tumorName}.circos.conf" + File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" + File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" + File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" + File circosGaps = "~{outputDir}/circos/gaps.txt" + File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" + File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos" + File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf" + File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" + File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, @@ -519,6 +530,7 @@ task Purple { purpleVersion] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] + Array[File] circos = [] } runtime { From 13967b1793fc585d9f3753d87b618fd2c6819736 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Mar 2021 14:13:06 +0100 Subject: [PATCH 474/902] add array for circos confs --- hmftools.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2fad41fe..dc31f41b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -513,7 +513,7 @@ task Purple { File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" - File circosCond = "~{outputDir}/circos/~{tumorName}.circos.conf" + File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" @@ -530,7 +530,9 @@ task Purple { purpleVersion] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] - Array[File] circos = [] + Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, + circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, + circosSnp] } runtime { From beb5444092b8dea12fe0674a40bd4326d1daf426 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 17 Mar 2021 11:01:31 +0100 Subject: [PATCH 475/902] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 437294cd..c0a79fc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ UMI-tools: re-introduce samtools indexing ++ UMI-tools: update default dockerImage + UMI-tools (dedup): Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. From 2410d0d5c2415f234739f63bbef913f5f531eab7 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 17 Mar 2021 11:02:01 +0100 Subject: [PATCH 476/902] update dockerImage and re-introduce samtools indexing --- umi-tools.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 6b3aa697..a09ca642 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -31,7 +31,7 @@ task Extract { Boolean threePrime = false Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" } command { @@ -87,7 +87,7 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - String dockerImage = "quay.io/biocontainers/umi_tools:1.1.1--py38h0213d0e_1" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" } String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") @@ -102,10 +102,12 @@ task Dedup { ~{"--umi-separator=" + umiSeparator} \ ~{true="--paired" false="" paired} \ --temp-dir=~{tmpDir} + samtools index ~{outputBamPath} ~{outputBamIndex} } output { File deduppedBam = outputBamPath + File deduppedBamIndex = outputBamIndex File? editDistance = "~{statsPrefix}_edit_distance.tsv" File? umiStats = "~{statsPrefix}_per_umi.tsv" File? positionStats = "~{statsPrefix}_per_umi_per_position.tsv" @@ -132,6 +134,7 @@ task Dedup { # outputs deduppedBam: {description: "Deduplicated BAM file."} + deduppedBamIndex: {description: "Index of the deduplicated BAM file."} editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."} umiStats: {description: "UMI-level summary statistics."} positionStats: {description: "The counts for unique combinations of UMI and position."} From 2a601648e8728305452e244bb95e296ad5d2441b Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 18 Mar 2021 10:59:24 +0100 Subject: [PATCH 477/902] update CHANGELOG --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0a79fc4..64f40df6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,9 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ UMI-tools: re-introduce samtools indexing -+ UMI-tools: update default dockerImage -+ UMI-tools (dedup): Add tempdir ++ UMI-tools (v1.1.1): re-introduce samtools indexing ++ UMI-tools (v1.1.1): update default dockerImage ++ UMI-tools dedup (v1.1.1): Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. From c791c96a60e6eee1c104cda7b884039a67be53d4 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Thu, 18 Mar 2021 11:22:09 +0100 Subject: [PATCH 478/902] Update CHANGELOG.md Co-authored-by: Davy Cats --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64f40df6..c204ba24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,9 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ UMI-tools (v1.1.1): re-introduce samtools indexing -+ UMI-tools (v1.1.1): update default dockerImage -+ UMI-tools dedup (v1.1.1): Add tempdir ++ UMI-tools: re-introduce samtools indexing ++ UMI-tools: update default dockerImage to use umitools v1.1.1 ++ UMI-tools dedup: Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. From 359456efd96ccd2326657e5dec543c5a73efd92c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Mar 2021 17:07:41 +0100 Subject: [PATCH 479/902] increase time and memory for picard collectWgsMetrics --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index 8dc4e0bf..aefb4c21 100644 --- a/picard.wdl +++ b/picard.wdl @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String memory = "33G" + String javaXmx = "32G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } From 55818f8742b709e9bb4007d4e529878ba0aa47e8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 22 Mar 2021 12:47:21 +0100 Subject: [PATCH 480/902] update dockerImage --- umi-tools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index a09ca642..86bf1314 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -31,7 +31,7 @@ task Extract { Boolean threePrime = false Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } command { @@ -87,7 +87,7 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") From 9c36780b3c24d40cb0ed7bb37c1c3b0c41d2269e Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 23 Mar 2021 09:57:29 +0100 Subject: [PATCH 481/902] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c204ba24..f3b04d4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) + UMI-tools: re-introduce samtools indexing + UMI-tools: update default dockerImage to use umitools v1.1.1 + UMI-tools dedup: Add tempdir From 5db3dd912fbf3b8cdaefefe198a59e998ebdd89a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Mar 2021 11:15:41 +0100 Subject: [PATCH 482/902] update memory and timeMinutes for cutadapt and bwa --- bwa.wdl | 4 ++-- cutadapt.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index faa4121a..cc8ea0c6 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 500 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 500 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.5) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..bca29db3 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -84,7 +84,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } From 2aba7899cdf1a76d2afa089e230335bf0843b72c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Mar 2021 12:59:54 +0100 Subject: [PATCH 483/902] increase memory bwa --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index cc8ea0c6..670f00d2 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 3) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. From f83b315ebb5318147ce3f08d8ba0d313146753d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 30 Mar 2021 09:55:44 +0200 Subject: [PATCH 484/902] add more memory to sambamba markdup --- sambamba.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index b6ef5e9b..b4eca66b 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize + # Added 2024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 2048 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 0862eab38451da3de6615ad419ea56402690e7a0 Mon Sep 17 00:00:00 2001 From: dcats Date: Mon, 12 Apr 2021 16:50:33 +0200 Subject: [PATCH 485/902] memory and runtime adjustements --- bcftools.wdl | 4 ++-- gridss.wdl | 4 ++-- hmftools.wdl | 10 +++++----- sambamba.wdl | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 4827a631..28b62696 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "256M" - Int timeMinutes = 10 + ceil(size(inputFile, "G")) + String memory = "5G" + Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/gridss.wdl b/gridss.wdl index b4b36b01..11014a88 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -36,7 +36,7 @@ task GRIDSS { File? blacklistBed File? repeatmaskerBed - Int jvmHeapSizeGb = 30 + Int jvmHeapSizeGb = 64 Int threads = 4 Int timeMinutes = ceil(5760 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" @@ -70,7 +70,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 1}G" + memory: "~{jvmHeapSizeGb + 25}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/hmftools.wdl b/hmftools.wdl index dc31f41b..553879f9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "33G" - String javaXmx = "32G" + String memory = "52G" + String javaXmx = "50G" Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -341,7 +341,7 @@ task Linx { File svVcf File svVcfIndex Array[File]+ purpleOutput - File referenceFasta + File referenceFasta #FIXME Not used in pipeline5? File referenceFastaFai File referenceFastaDict String refGenomeVersion @@ -597,8 +597,8 @@ task Sage { File? coverageBed Int threads = 2 - String javaXmx = "32G" - String memory = "33G" + String javaXmx = "50G" + String memory = "75G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" } diff --git a/sambamba.wdl b/sambamba.wdl index b4eca66b..c8d9e11c 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -69,7 +69,7 @@ task Markdup { String outputPath Int compressionLevel = 1 # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1. - Int sortBufferSize = 2048 + Int sortBufferSize = 4096 Int ioBufferSize = 128 Boolean removeDuplicates = false @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 2024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 2048 + sortBufferSize + 2 * ioBufferSize + # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 22933762f7683b98535da38de2954db41c44be37 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:31:58 +0200 Subject: [PATCH 486/902] add germline options to purple --- hmftools.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 553879f9..e8b60bc0 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -452,6 +452,7 @@ task Purple { Array[File]+ cobaltOutput File gcProfile File somaticVcf + File germlineVcf File filteredSvVcf File fullSvVcf File fullSvVcfIndex @@ -460,6 +461,7 @@ task Purple { File referenceFastaDict File driverGenePanel File somaticHotspots + File germlineHotspots Int threads = 1 Int timeMinutes = 60 @@ -477,6 +479,7 @@ task Purple { -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ + -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ @@ -484,6 +487,7 @@ task Purple { -driver_catalog \ -driver_gene_panel ~{driverGenePanel} \ -somatic_hotspots ~{somaticHotspots} \ + -germline_hotspots ~{germlineHotspots} \ -threads ~{threads} } @@ -550,6 +554,7 @@ task Purple { cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"} gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} somaticVcf: {description: "The somatic variant calling results.", category: "required"} + germlineVcf: {description: "The germline variant calling results.", category: "required"} filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"} fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} @@ -557,7 +562,8 @@ task Purple { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - somaticHotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} + germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0013a03155aed7748864308f9fda5b4f07d79706 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:37:54 +0200 Subject: [PATCH 487/902] remove ref_genome from Linx --- hmftools.wdl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index e8b60bc0..1a99caf6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -341,9 +341,6 @@ task Linx { File svVcf File svVcfIndex Array[File]+ purpleOutput - File referenceFasta #FIXME Not used in pipeline5? - File referenceFastaFai - File referenceFastaDict String refGenomeVersion String outputDir = "./linx" File fragileSiteCsv @@ -369,7 +366,6 @@ task Linx { -sample ~{sampleName} \ -sv_vcf ~{svVcf} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ -output_dir ~{outputDir} \ -fragile_site_file ~{fragileSiteCsv} \ From bf43886539cb8d40d5b9637e3920ffba8d5f80a0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:41:07 +0200 Subject: [PATCH 488/902] remove unused parameter_meta --- hmftools.wdl | 3 --- 1 file changed, 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1a99caf6..48c6099c 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -414,9 +414,6 @@ task Linx { svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} purpleOutput: {description: "The files produced by PURPLE.", category: "required"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} From a4d5102d42edf0d7d5795f5860817b38e680e597 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 14 Apr 2021 13:27:26 +0200 Subject: [PATCH 489/902] add gridss properties --- gridss.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 11014a88..ef5ae9e5 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,6 +35,7 @@ task GRIDSS { String? normalLabel File? blacklistBed File? repeatmaskerBed + File? gridssProperties Int jvmHeapSizeGb = 64 Int threads = 4 @@ -50,9 +51,10 @@ task GRIDSS { --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ + ~{"-c " + gridssProperties} \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ - --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ @@ -87,6 +89,7 @@ task GRIDSS { normalLabel: {description: "The name of the normal sample.", category: "advanced"} blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} + gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} From e81de32b4db6b48ff458f368b253010bcbff7187 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 15 Apr 2021 11:50:41 +0200 Subject: [PATCH 490/902] upgrade sage version --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 48c6099c..0a566d8e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -599,7 +599,7 @@ task Sage { String javaXmx = "50G" String memory = "75G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" } command { From 51e524a7fa1ffe7664882941e7fc0ffc7aa14ad3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 20 Apr 2021 12:25:16 +0200 Subject: [PATCH 491/902] add missing purple outputs, fix typo --- hmftools.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 0a566d8e..3dd52daf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -192,13 +192,13 @@ task GripssApplicationKt { -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ - ~reference ~{normalName} \ + -reference ~{normalName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ -breakpoint_pon ~{breakpointPon} \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} } output { @@ -486,6 +486,7 @@ task Purple { output { File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" + File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" @@ -497,6 +498,8 @@ task Purple { File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" + File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" + File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" @@ -524,7 +527,7 @@ task Purple { purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, - purpleVersion] + purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, From 32c08100bcbf0590d7c1d69e08cdae2e3c640e99 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 May 2021 14:16:16 +0200 Subject: [PATCH 492/902] adjust runtime settings --- bcftools.wdl | 4 ++-- bwa.wdl | 4 ++-- extractSigPredictHRD.wdl | 4 ++-- gridss.wdl | 6 +++--- hmftools.wdl | 43 ++++++++++++++++++++-------------------- picard.wdl | 6 +++--- sambamba.wdl | 2 +- samtools.wdl | 2 ++ snpeff.wdl | 6 +++--- 9 files changed, 39 insertions(+), 38 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28b62696..8fab933a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "5G" - Int timeMinutes = 60 + ceil(size(inputFile, "G")) + String memory = "1G" + Int timeMinutes = 30 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bwa.wdl b/bwa.wdl index 670f00d2..1cb170b7 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 500 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 300 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 3) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 69c41ef8..2b5d9781 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -30,8 +30,8 @@ task ExtractSigPredictHRD { File svVcfIndex Boolean hg38 = false - String memory = "8G" - Int timeMinutes = 15 + String memory = "3G" + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" } diff --git a/gridss.wdl b/gridss.wdl index ef5ae9e5..acafc911 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -39,7 +39,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 4 - Int timeMinutes = ceil(5760 / threads) + 10 + Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -72,7 +72,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 25}G" + memory: "~{jvmHeapSizeGb + 15}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -117,7 +117,7 @@ task AnnotateInsertedSequence { String javaXmx = "8G" String memory = "9G" String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2 / threads) + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) } command { diff --git a/hmftools.wdl b/hmftools.wdl index 3dd52daf..9b22c10d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "52G" String javaXmx = "50G" - Int timeMinutes = 1200 + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -112,9 +112,9 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = "9G" - String javaXmx = "8G" - Int timeMinutes = 1200 + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } @@ -181,9 +181,9 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 120 + String memory = "33G" + String javaXmx = "32G" + Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } @@ -236,9 +236,9 @@ task GripssHardFilterApplicationKt { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 120 + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } @@ -274,7 +274,6 @@ task GripssHardFilterApplicationKt { } task HealthChecker { - # WIP input { String outputDir = "." String normalName @@ -285,9 +284,9 @@ task HealthChecker { File tumorMetrics Array[File]+ purpleOutput - String javaXmx = "10G" - String memory = "11G" - Int timeMinutes = 10 + String javaXmx = "2G" + String memory = "1G" + Int timeMinutes = 1 String dockerImage = "quay.io/biowdl/health-checker:3.2" } @@ -355,9 +354,9 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9G" - String javaXmx = "8G" - Int timeMinutes = 30 + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" } @@ -457,9 +456,9 @@ task Purple { File germlineHotspots Int threads = 1 - Int timeMinutes = 60 - String memory = "13G" - String javaXmx = "12G" + Int timeMinutes = 30 + String memory = "9G" + String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" } @@ -600,8 +599,8 @@ task Sage { Int threads = 2 String javaXmx = "50G" - String memory = "75G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String memory = "60G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" } diff --git a/picard.wdl b/picard.wdl index aefb4c21..9a935045 100644 --- a/picard.wdl +++ b/picard.wdl @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "33G" - String javaXmx = "32G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } diff --git a/sambamba.wdl b/sambamba.wdl index c8d9e11c..e78f50b6 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } diff --git a/samtools.wdl b/samtools.wdl index 9042a0df..954b5d4e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -27,6 +27,7 @@ task BgzipAndIndex { String type = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String memory = "1G" String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -55,6 +56,7 @@ task BgzipAndIndex { outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/snpeff.wdl b/snpeff.wdl index 85709079..4a3640c7 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -36,9 +36,9 @@ task SnpEff { Boolean noShiftHgvs = false Int? upDownStreamLen - String memory = "50G" - String javaXmx = "49G" - Int timeMinutes = 60 #FIXME + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" } From ffda341fae7bc7cc519451b018e43a76cae34d8e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 21 May 2021 14:37:56 +0200 Subject: [PATCH 493/902] adjust runtime settings --- bcftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 8fab933a..059cc39d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "1G" - Int timeMinutes = 30 + ceil(size(inputFile, "G")) + String memory = "2G" + Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c3df943f2964d2d5551baaf64c9bb2e2d9c198bf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 25 May 2021 13:01:25 +0200 Subject: [PATCH 494/902] update memory bcftools --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 059cc39d..5170a01f 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,7 +47,7 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "2G" + String memory = "4G" Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From ae1d2c02628d2239e79d24ecb78b4d4a3bcbc2d9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 28 May 2021 13:45:25 +0200 Subject: [PATCH 495/902] update changelog --- CHANGELOG.md | 2 ++ htseq.wdl | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22f41826..58d9f57f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ GffCompare: Make the `referenceAnnotation` input optional. ++ Stringtie: Add the `minimumCoverage` input. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. + Fixed the `size` call in the default for gffread's timeMinutes, to retrieve diff --git a/htseq.wdl b/htseq.wdl index ef4ae0a3..76d3bb83 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -23,7 +23,6 @@ version 1.0 task HTSeqCount { input { Array[File]+ inputBams - Array[File]+ inputBamIndexes File gtfFile String outputTable = "output.tsv" String order = "pos" From 24a6f1104c3a05053931b37db3fb8f3dd1e178b1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 31 May 2021 14:19:34 +0200 Subject: [PATCH 496/902] fix gffcompare --- gffcompare.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index 8b135479..5c83ba9d 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -91,7 +91,7 @@ task GffCompare { else 0 Int noInputFiles = length(inputGtfFiles) Boolean oneFile = (noFilesGtfList + noInputFiles) == 1 - String annotatedName = if oneFile + String annotatedName = if oneFile && defined(referenceAnnotation) then "annotated" else "combined" From 743acb0b89cc4893544965e3d93590978b414420 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 1 Jun 2021 12:13:09 +0200 Subject: [PATCH 497/902] Fix memory values. --- CHANGELOG.md | 1 + CPAT.wdl | 5 ++++- gffcompare.wdl | 3 +++ gffread.wdl | 3 +++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28e998e6..e47033c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add memory values to GffCompare, GffRead and CPAT. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. + UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) diff --git a/CPAT.wdl b/CPAT.wdl index afb67853..4a6d4478 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -34,6 +34,7 @@ task CPAT { Array[String]? startCodons Array[String]? stopCodons + String memory = "4G" Int timeMinutes = 10 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } @@ -60,8 +61,9 @@ task CPAT { } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -74,6 +76,7 @@ task CPAT { referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", category: "advanced"} startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gffcompare.wdl b/gffcompare.wdl index 5c83ba9d..aa7c7209 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -46,6 +46,7 @@ task GffCompare { Int? maxDistanceGroupingTranscriptStartSites String? namePrefix + String memory = "4G" Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" @@ -114,6 +115,7 @@ task GffCompare { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -140,6 +142,7 @@ task GffCompare { maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gffread.wdl b/gffread.wdl index 967dd5c9..a04540f5 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,6 +32,7 @@ task GffRead { String? proteinFastaPath String? filteredGffPath + String memory = "4G" Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } @@ -64,6 +65,7 @@ task GffRead { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -78,6 +80,7 @@ task GffRead { CDSFastaPath: {description: "The location the CDS fasta should be written to.", category: "advanced"} proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"} filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 379d0be3671d7c6aee65b8e18a73798f1ef80733 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Jun 2021 10:25:03 +0200 Subject: [PATCH 498/902] fix some runtime settings --- cutadapt.wdl | 2 +- multiqc.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..b49a95d4 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -83,7 +83,7 @@ task Cutadapt { Boolean? noZeroCap Int cores = 4 - String memory = "~{300 + 100 * cores}M" + String memory = "5G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } diff --git a/multiqc.wdl b/multiqc.wdl index 2571463a..a1662937 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -57,7 +57,7 @@ task MultiQC { String? clConfig String? memory - Int timeMinutes = 2 + ceil(size(reports, "G") * 8) + Int timeMinutes = 10 + ceil(size(reports, "G") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } From e7400ced4a7e413f794e05a62c8e2c1261a0e7fc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Mon, 7 Jun 2021 14:17:14 +0200 Subject: [PATCH 499/902] Move pacbio-merge image to quay.io Docker hub has started to remove unused images from free accounts, which means that it might remove images used by this pipeline without notice. Therefore the pipeline now exclusively uses images from quay.io or official repositories from docker hub, which do not have this limitation. --- pacbio.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pacbio.wdl b/pacbio.wdl index 7c0113fd..b21c69bc 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -26,7 +26,7 @@ task mergePacBio { String outputPathMergedReport String memory = "4G" - String dockerImage = "lumc/pacbio-merge:0.2" + String dockerImage = "quay.io/redmar_van_den_berg/pacbio-merge:0.2" } command { From a095517d6f9e729769e26e1bd7dd6385ac403fc1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 10 Jun 2021 16:20:34 +0200 Subject: [PATCH 500/902] Update tool versions. --- CHANGELOG.md | 18 ++++++++++++------ bam2fastx.wdl | 4 ++-- biowdl.wdl | 2 +- ccs.wdl | 2 +- common.wdl | 2 +- lima.wdl | 2 +- minimap2.wdl | 4 ++-- nanopack.wdl | 2 +- 8 files changed, 21 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e47033c6..9112c77d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,15 +10,21 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update biowdl-input-converter to version 0.3. ++ Update minimap2 to version 2.20. ++ Update lima to version 2.2.0. ++ Update ccs to version 6.0.0. ++ Update bam2fastx to version 1.3.1. + Add memory values to GffCompare, GffRead and CPAT. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. -+ UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) -+ UMI-tools: re-introduce samtools indexing -+ UMI-tools: update default dockerImage to use umitools v1.1.1 -+ UMI-tools dedup: Add tempdir -+ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). -+ Duphold: add duphold.wdl. ++ UMI-tools: Update default dockerImage to use umitools v1.1.1 with correct + samtools version (1.10). ++ UMI-tools: Re-introduce samtools indexing. ++ UMI-tools: Update default dockerImage to use umitools v1.1.1. ++ UMI-tools dedup: Add tempdir. ++ Bcftools view: Add options for filtering (include, exclude, excludeUncalled). ++ Duphold: Add `duphold.wdl`. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 2ae22a57..0bdccca8 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -32,7 +32,7 @@ task Bam2Fasta { String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } command { @@ -100,7 +100,7 @@ task Bam2Fastq { String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } command { diff --git a/biowdl.wdl b/biowdl.wdl index 06b1d756..dead8303 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -34,7 +34,7 @@ task InputConverter { String memory = "128M" Int timeMinutes = 1 - String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" + String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } command <<< diff --git a/ccs.wdl b/ccs.wdl index 4446937b..69095f4d 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -37,7 +37,7 @@ task CCS { Int threads = 2 String memory = "4G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" + String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } command { diff --git a/common.wdl b/common.wdl index 66bdb99c..54b11567 100644 --- a/common.wdl +++ b/common.wdl @@ -221,7 +221,7 @@ task YamlToJson { String memory = "128M" Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. - String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" + String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } command { diff --git a/lima.wdl b/lima.wdl index 2455aaac..f6faf079 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,7 +51,7 @@ task Lima { Int threads = 2 String memory = "2G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/lima:2.0.0--0" + String dockerImage = "quay.io/biocontainers/lima:2.2.0--h9ee0642_0" } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} diff --git a/minimap2.wdl b/minimap2.wdl index d2e69905..50ff4db3 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -33,7 +33,7 @@ task Indexing { Int cores = 1 String memory = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } command { @@ -100,7 +100,7 @@ task Mapping { Int cores = 4 String memory = "30G" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } command { diff --git a/nanopack.wdl b/nanopack.wdl index f86641b0..e4c94a43 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -42,7 +42,7 @@ task NanoPlot { Int threads = 2 String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/nanoplot:1.32.1--py_0" + String dockerImage = "quay.io/biocontainers/nanoplot:1.38.0--pyhdfd78af_0" } Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} From 7251bf276a5ea5a3d140d0438fe9647db74ddbc0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 09:59:25 +0200 Subject: [PATCH 501/902] Fix lima output naming. --- lima.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lima.wdl b/lima.wdl index f6faf079..6b87ad4f 100644 --- a/lima.wdl +++ b/lima.wdl @@ -91,13 +91,13 @@ task Lima { dirName="$(dirname ~{outputPrefix})" find "$(cd ${dirName}; pwd)" -name "*.bam" > bamFiles.txt find "$(cd ${dirName}; pwd)" -name "*.bam.pbi" > bamIndexes.txt - find "$(cd ${dirName}; pwd)" -name "*.subreadset.xml" > subreadsets.txt + find "$(cd ${dirName}; pwd)" -name "*.consensusreadset.xml" > consensusreadset.txt >>> output { Array[File] limaBam = read_lines("bamFiles.txt") Array[File] limaBamIndex = read_lines("bamIndexes.txt") - Array[File] limaXml = read_lines("subreadsets.txt") + Array[File] limaXml = read_lines("consensusreadset.txt") File limaStderr = outputPrefix + ".lima.stderr.log" File limaJson = outputPrefix + ".json" File limaCounts = outputPrefix + ".lima.counts" From 460d3d04e2aa83bac9b5ddfa708463a7a1713394 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 12:19:52 +0200 Subject: [PATCH 502/902] Update scripts. --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 85e2ec54..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 85e2ec542b65be5f2a25c22db05c28700fbe6db5 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From 6356c481cdb8d42820476fe7249f77d1e48bd9d2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 18:01:39 +0200 Subject: [PATCH 503/902] Fix outputs in ccs. --- CHANGELOG.md | 1 + ccs.wdl | 37 ++++++++++++++++++++++++++++++++----- scripts | 2 +- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e47033c6..c4eb9ac5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Fix output files in ccs.wdl. + Add memory values to GffCompare, GffRead and CPAT. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. diff --git a/ccs.wdl b/ccs.wdl index 4446937b..29f1a7f9 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -24,12 +24,19 @@ task CCS { input { File subreadsFile String outputPrefix + String logLevel = "WARN" Int minPasses = 3 + Int topPasses = 60 Int minLength = 10 Int maxLength = 50000 Boolean byStrand = false + Boolean skipPolish = false + Boolean all = false + Boolean subreadFallback = false + Boolean allKinetics = false + Boolean hifiKinetics = false + Float minSnr = 2.5 Float minReadQuality = 0.99 - String logLevel = "WARN" File? subreadsIndexFile String? chunkString @@ -37,7 +44,7 @@ task CCS { Int threads = 2 String memory = "4G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" + String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } command { @@ -45,15 +52,24 @@ task CCS { mkdir -p "$(dirname ~{outputPrefix})" ccs \ --min-passes ~{minPasses} \ + --min-snr ~{minSnr} \ + --top-passes ~{topPasses} \ --min-length ~{minLength} \ --max-length ~{maxLength} \ ~{true="--by-strand" false="" byStrand} \ + ~{true="--skip-polish" false="" skipPolish} \ + ~{true="--all" false="" all} \ + ~{true="--subread-fallback" false="" subreadFallback} \ + ~{true="--all-kinetics" false="" allKinetics} \ + ~{true="--hifi-kinetics" false="" hifiKinetics} \ --min-rq ~{minReadQuality} \ --log-level ~{logLevel} \ --num-threads ~{threads} \ ~{"--chunk " + chunkString} \ + ~{"--report-file " + outputPrefix + ".ccs_report.txt"} \ ~{"--report-json " + outputPrefix + ".ccs.report.json"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ + ~{"--metrics-json " + outputPrefix + ".zmw_metrics.json.gz"} \ ~{subreadsFile} \ ~{outputPrefix + ".ccs.bam"} } @@ -61,8 +77,10 @@ task CCS { output { File ccsBam = outputPrefix + ".ccs.bam" File ccsBamIndex = outputPrefix + ".ccs.bam.pbi" - File ccsReport = outputPrefix + ".ccs.report.json" + File ccsReport = outputPrefix + ".ccs_report.txt" + File ccsJsonReport = outputPrefix + ".ccs.report.json" File ccsStderr = outputPrefix + ".ccs.stderr.log" + File zmwMetrics = outputPrefix + ".zmw_metrics.json.gz" } runtime { @@ -76,12 +94,19 @@ task CCS { # inputs subreadsFile: {description: "Subreads input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} minPasses: {description: "Minimum number of full-length subreads required to generate ccs for a ZMW.", category: "advanced"} + topPasses: {description: "Pick at maximum the top N passes for each ZMW.", category: "advanced"} minLength: {description: "Minimum draft length before polishing.", category: "advanced"} maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} + skipPolish: {description: "Only output the initial draft template (faster, less accurate).", category: "advanced"} + all: {description: "Emit all ZMWs.", category: "advanced"} + subreadFallback: {description: "Emit a representative subread, instead of the draft consensus, if polishing failed.", category: "advanced"} + allKinetics: {description: "Calculate mean pulse widths (PW) and interpulse durations (IPD) for every ZMW.", category: "advanced"} + hifiKinetics: {description: "Calculate mean pulse widths (PW) and interpulse durations (IPD) for every HiFi read.", category: "advanced"} + minSnr: {description: "Minimum SNR of subreads to use for generating CCS.", category: "advanced"} minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} - logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsIndexFile: {description: "Index for the subreads input file, required when using chunkString.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} threads: {description: "The number of threads to be used.", category: "advanced"} @@ -92,7 +117,9 @@ task CCS { # outputs ccsBam: {description: "Consensus reads output file."} ccsBamIndex: {description: "Index of consensus reads output file."} - ccsReport: {description: "Ccs results report file."} + ccsReport: {description: "Ccs report file."} + ccsJsonReport: {description: "Ccs results json report file."} ccsStderr: {description: "Ccs STDERR log file."} + zmwMetrics: {description: "ZMW metrics json file."} } } diff --git a/scripts b/scripts index 85e2ec54..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 85e2ec542b65be5f2a25c22db05c28700fbe6db5 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From 833ad0bf47f9c42e33743ed5b0de7851ef66bbf1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 18:06:21 +0200 Subject: [PATCH 504/902] Remove weird line. --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4bade66..83da5399 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -<<<<<<< HEAD + Fix output files in ccs.wdl. + Update biowdl-input-converter to version 0.3. + Update minimap2 to version 2.20. From 4f879f72aec90d36d0201e9c1b54154f9decb757 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 12:40:32 +0200 Subject: [PATCH 505/902] Try to adjust localization. --- CHANGELOG.md | 7 ++++++- bam2fastx.wdl | 12 ++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83da5399..11a92d83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,12 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Fix output files in ccs.wdl. ++ Change the way localization of the input bam files and index are handled + in the bam2fastx tasks. ++ Add new parameters from CCS version 6.0.0 and add two new outputs: + `ccs_report.txt` & `zmw_metrics.json.gz`. ++ Change CutAdapt memory to `5G`. ++ Increase multiqc base time from 5 to 10. + Update biowdl-input-converter to version 0.3. + Update minimap2 to version 2.20. + Update lima to version 2.2.0. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0bdccca8..4a2ecf87 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - ln $bamFile . - bamFiles=$bamFiles" $(basename $bamFile)" + cp $bamFile ./ + bamFiles=$bamFiles" ./$(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln $index . + cp $index ./ done bam2fasta \ @@ -112,13 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - ln $bamFile . - bamFiles=$bamFiles" $(basename $bamFile)" + cp $bamFile ./ + bamFiles=$bamFiles" ./$(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln $index . + cp $index ./ done bam2fastq \ From a422e52920dc8fa2d2614f632962dec37964b939 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 14:23:57 +0200 Subject: [PATCH 506/902] Try a different approach. --- bam2fastx.wdl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 4a2ecf87..4e5ed3ed 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -35,7 +35,7 @@ task Bam2Fasta { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - cp $bamFile ./ - bamFiles=$bamFiles" ./$(basename $bamFile)" + ln -s ${bamFile} ./ + bamFiles=${bamFiles}" ./$(basename ${bamFile})" done - for index in ~{sep=" " bamIndex} + for indexFile in ~{sep=" " bamIndex} do - cp $index ./ + ln -s ${indexFile} ./ done bam2fasta \ @@ -58,8 +58,8 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - $bamFiles - } + ${bamFiles} + >>> output { File fastaFile = outputPrefix + ".fasta.gz" @@ -103,7 +103,7 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -112,13 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - cp $bamFile ./ - bamFiles=$bamFiles" ./$(basename $bamFile)" + ln -s ${bamFile} ./ + bamFiles=${bamFiles}" ./$(basename ${bamFile})" done - for index in ~{sep=" " bamIndex} + for indexFile in ~{sep=" " bamIndex} do - cp $index ./ + ln -s ${indexFile} ./ done bam2fastq \ @@ -126,8 +126,8 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - $bamFiles - } + ${bamFiles} + >>> output { File fastqFile = outputPrefix + ".fastq.gz" From adad218bbd6f501b0194107adf81cc9588ba91ba Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 15:02:41 +0200 Subject: [PATCH 507/902] Test tool without localization. --- bam2fastx.wdl | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 4e5ed3ed..3cdb29fb 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -103,31 +103,16 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command <<< + command { set -e mkdir -p "$(dirname ~{outputPrefix})" - - # Localise the bam and pbi files so they are next to each other in the - # current folder. - bamFiles="" - for bamFile in ~{sep=" " bam} - do - ln -s ${bamFile} ./ - bamFiles=${bamFiles}" ./$(basename ${bamFile})" - done - - for indexFile in ~{sep=" " bamIndex} - do - ln -s ${indexFile} ./ - done - bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} - >>> + ~{bam} + } output { File fastqFile = outputPrefix + ".fastq.gz" From adee85e2cfe420ba3a7be24f764233597d00a74a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 15:08:35 +0200 Subject: [PATCH 508/902] Fix array. --- bam2fastx.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 3cdb29fb..b09f7a0f 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -111,7 +111,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{bam} + ~{sep=" " bam} } output { From 734c4037e642bf318b249f8835f2042c40ff328d Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 15:49:53 +0200 Subject: [PATCH 509/902] Try another approach. --- bam2fastx.wdl | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index b09f7a0f..110441ec 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -103,16 +103,26 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" + + # Localise the bam and pbi files so they are next to each other in the + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam} + do + fullPathBam=$(readlink -f ${bamFile}) + bamFiles=${bamFiles}" ${fullPathBam}" + done + bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " bam} - } + ${bamFiles} + >>> output { File fastqFile = outputPrefix + ".fastq.gz" From 235fb43f046b285a3b5d8ca702b2cc8ad64dcf36 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 15 Jun 2021 11:18:37 +0200 Subject: [PATCH 510/902] Revert changes to WDL file. --- bam2fastx.wdl | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 110441ec..0bdccca8 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -35,7 +35,7 @@ task Bam2Fasta { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command <<< + command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - ln -s ${bamFile} ./ - bamFiles=${bamFiles}" ./$(basename ${bamFile})" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" done - for indexFile in ~{sep=" " bamIndex} + for index in ~{sep=" " bamIndex} do - ln -s ${indexFile} ./ + ln $index . done bam2fasta \ @@ -58,8 +58,8 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} - >>> + $bamFiles + } output { File fastaFile = outputPrefix + ".fasta.gz" @@ -103,7 +103,7 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command <<< + command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -112,8 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - fullPathBam=$(readlink -f ${bamFile}) - bamFiles=${bamFiles}" ${fullPathBam}" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" + done + + for index in ~{sep=" " bamIndex} + do + ln $index . done bam2fastq \ @@ -121,8 +126,8 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} - >>> + $bamFiles + } output { File fastqFile = outputPrefix + ".fastq.gz" From f2f7411a7b32bda18bba6eb8ee83606fa635f9e0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 15 Jun 2021 12:43:37 +0200 Subject: [PATCH 511/902] Add directory creation to samtools fastq. --- CHANGELOG.md | 3 +-- samtools.wdl | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11a92d83..e7242699 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,8 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Change the way localization of the input bam files and index are handled - in the bam2fastx tasks. ++ Samtools: Add mkdir line to `Fastq` task. + Add new parameters from CCS version 6.0.0 and add two new outputs: `ccs_report.txt` & `zmw_metrics.json.gz`. + Change CutAdapt memory to `5G`. diff --git a/samtools.wdl b/samtools.wdl index 9042a0df..46d1eb70 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -122,6 +122,8 @@ task Fastq { } command { + set -e + mkdir -p "$(dirname ~{outputRead1})" samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-2 " + outputRead2} \ From de03877e2e831285daaccc820db98da0897e1dac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Jun 2021 13:53:25 +0200 Subject: [PATCH 512/902] add cuppa and cuppa chart --- hmftools.wdl | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 9b22c10d..779820a3 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -168,6 +168,112 @@ task Cobalt { } } +task Cuppa { + input { + Array[File]+ linxOutput + Array[File]+ purpleOutput + String sampleName + Array[String]+ categories = ["DNA"] + Array[File]+ referenceData + File purpleSvVcf + File purpleSvVcfIndex + File purpleSomaticVcf + File purpleSomaticVcfIndex + String outputDir = "./cuppa" + + String javaXmx = "4G" + String memory = "5G" + Int time_minutes = 10 + String dockerImage = "quay.io/biowdl/cuppa:1.4" + } + + command { + set -e + mkdir -p sampleData ~{outputDir} + ln -s -t sampleData ~{sep=" " linxOutput} ~{sep=" " purpleOutput} + cuppa -Xmx~{javaXmx} \ + -output_dir ~{outputDir} \ + -output_id ~{sampleName} \ + -categories '~{sep="," categories}' \ + -ref_data_dir ~{sub(referenceData[0], basename(referenceData[0]), "")} \ + -sample_data_dir sampleData \ + -sample_data ~{sampleName} \ + -sample_sv_file ~{purpleSvVcf} \ + -sample_somatic_vcf ~{purpleSomaticVcf} + } + + output { + File cupData = "~{outputDir}/~{sampleName}.cup.data.csv" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + linxOutput: {description: "The files produced by linx.", category: "required"} + purpleOutput: {description: "The files produced by purple.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + categories: {description: "The classifiers to use.", category: "advanced"} + referenceData : {description: "The reference data.", category: "required"} + purpleSvVcf: {description: "The VCF file produced by purple which contains structural variants.", category: "required"} + purpleSvVcfIndex: {description: "The index of the structural variants VCF file produced by purple.", category: "required"} + purpleSomaticVcf: {description: "The VCF file produced by purple which contains somatic variants.", category: "required"} + purpleSomaticVcfIndex: {description: "The index of the somatic VCF file produced by purple.", category: "required"} + outputDir: {description: "The directory the ouput will be placed in.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task CuppaChart { + input { + String sampleName + File cupData + String outputDir = "./cuppa" + + String memory = "4G" + Int time_minutes = 5 + String dockerImage = "quay.io/biowdl/cuppa:1.4" + } + + command { + set -e + mkdir -p ~{outputDir} + cuppa-chart \ + -sample ~{sampleName} + -sample_data ~{cupData} + -output_dir ~{outputDir} + } + + output { + File cuppaChart = "~{outputDir}/~{sampleName}.cuppa.chart.png" + File cuppaConclusion = "~{outputDir}/~{sampleName}.cuppa.conclusion.txt" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category:"common"} + cupData: {description: "The cuppa output.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category:"common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GripssApplicationKt { input { File inputVcf From c0477edfd5904f1de11d7ea0d60e8b65e36e0bed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 16 Jun 2021 10:25:47 +0200 Subject: [PATCH 513/902] fix typo --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 779820a3..8beb5c76 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -183,7 +183,7 @@ task Cuppa { String javaXmx = "4G" String memory = "5G" - Int time_minutes = 10 + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.4" } @@ -239,7 +239,7 @@ task CuppaChart { String outputDir = "./cuppa" String memory = "4G" - Int time_minutes = 5 + Int timeMinutes = 5 String dockerImage = "quay.io/biowdl/cuppa:1.4" } From 1be4badcf451ccad2d2198dbfec4d97aaf68af45 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 21 Jun 2021 11:54:52 +0200 Subject: [PATCH 514/902] increase memory for amber --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8beb5c76..868d03fe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "52G" - String javaXmx = "50G" + String memory = "70G" + String javaXmx = "64G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 7a693a69f9a59755d527d733946406eed3a2f124 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Jun 2021 11:38:04 +0200 Subject: [PATCH 515/902] remove rainfall plot output --- hmftools.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 868d03fe..8e60351b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -615,7 +615,6 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" @@ -634,7 +633,7 @@ task Purple { purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, - segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] + segmentPlot, somaticClonalityPlot, somaticPlot] Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, circosSnp] From 664325fc50d19e074d80780cae322157f07035ed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Jun 2021 13:34:07 +0200 Subject: [PATCH 516/902] fix missing backslashes --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8e60351b..1b9d8d22 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -247,8 +247,8 @@ task CuppaChart { set -e mkdir -p ~{outputDir} cuppa-chart \ - -sample ~{sampleName} - -sample_data ~{cupData} + -sample ~{sampleName} \ + -sample_data ~{cupData} \ -output_dir ~{outputDir} } From 5e29a653559f7b7cc0f1e2fc787bbf8a8117f306 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 25 Jun 2021 07:40:37 +0200 Subject: [PATCH 517/902] Set defaults for boolean values --- spades.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/spades.wdl b/spades.wdl index 7cc16d21..1f246d48 100644 --- a/spades.wdl +++ b/spades.wdl @@ -34,16 +34,16 @@ task Spades { File? tslrContigs File? trustedContigs File? untrustedContigs - Boolean? singleCell - Boolean? metagenomic - Boolean? rna - Boolean? plasmid - Boolean? ionTorrent - Boolean? onlyErrorCorrection - Boolean? onlyAssembler - Boolean? careful - Boolean? disableGzipOutput - Boolean? disableRepeatResolution + Boolean singleCell = False + Boolean metagenomic = False + Boolean rna = False + Boolean plasmid = False + Boolean ionTorrent = False + Boolean onlyErrorCorrection = False + Boolean onlyAssembler = False + Boolean careful = False + Boolean disableGzipOutput = False + Boolean disableRepeatResolution = False File? dataset File? tmpDir String? k From 2ebde5f0a1997a098f89370989bdbbcf242ac207 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 28 Jun 2021 09:51:24 +0200 Subject: [PATCH 518/902] survivor: line 47-49, change integer to string literal --- survivor.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/survivor.wdl b/survivor.wdl index 8b0360d8..de232405 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -44,9 +44,9 @@ task Merge { fileList \ ~{breakpointDistance} \ ~{suppVecs} \ - ~{true=1 false=0 svType} \ - ~{true=1 false=0 strandType} \ - ~{true=1 false=0 distanceBySvSize} \ + ~{true='1' false='0' svType} \ + ~{true='1' false='0' strandType} \ + ~{true='1' false='0' distanceBySvSize} \ ~{minSize} \ ~{outputPath} } From da28f9399252cb8777abc630fe8c34e406d13da3 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 28 Jun 2021 10:56:04 +0200 Subject: [PATCH 519/902] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7242699..ad3d30fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Survivor: Change integer to string literal in boolean parameters. + Samtools: Add mkdir line to `Fastq` task. + Add new parameters from CCS version 6.0.0 and add two new outputs: `ccs_report.txt` & `zmw_metrics.json.gz`. From 545f63af658df8fc515672589a7bfb7e81ed2be3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 28 Jun 2021 11:36:52 +0200 Subject: [PATCH 520/902] update some version and add repeatmasker annotation for gridss --- gridss.wdl | 123 +++++++++++++++++++++++++++++++-------------------- hmftools.wdl | 9 ++-- 2 files changed, 79 insertions(+), 53 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index acafc911..3844c602 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -22,6 +22,61 @@ version 1.0 import "bwa.wdl" as bwa +task AnnotateInsertedSequence { + input { + File inputVcf + String outputPath = "gridss.annotated.vcf.gz" + File viralReference + File viralReferenceFai + File viralReferenceDict + File viralReferenceImg + + Int threads = 8 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + } + + command { + AnnotateInsertedSequence -Xmx~{javaXmx} \ + REFERENCE_SEQUENCE=~{viralReference} \ + INPUT=~{inputVcf} \ + OUTPUT=~{outputPath} \ + ALIGNMENT=APPEND \ + WORKING_DIR='.' \ + WORKER_THREADS=~{threads} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF file.", category: "required"} + outputPath: {description: "The path the output will be written to.", category: "common"} + viralReference: {description: "A fasta file with viral sequences.", category: "required"} + viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} + viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} + viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { File tumorBam @@ -34,13 +89,12 @@ task GRIDSS { File? normalBai String? normalLabel File? blacklistBed - File? repeatmaskerBed File? gridssProperties Int jvmHeapSizeGb = 64 Int threads = 4 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" } command { @@ -56,7 +110,6 @@ task GRIDSS { ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ - ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz @@ -88,7 +141,6 @@ task GRIDSS { normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} - repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} @@ -104,64 +156,37 @@ task GRIDSS { } } -task AnnotateInsertedSequence { +task GridssAnnotateVcfRepeatmasker { input { - File inputVcf - String outputPath = "gridss.annotated.vcf.gz" - File viralReference - File viralReferenceFai - File viralReferenceDict - File viralReferenceImg + File gridssVcf + File gridssVcfIndex + String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - Int threads = 8 - String javaXmx = "8G" - String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + String memory = "4G" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3) } command { - java -Xmx~{javaXmx} \ - -Dsamjdk.create_index=true \ - -Dsamjdk.use_async_io_read_samtools=true \ - -Dsamjdk.use_async_io_write_samtools=true \ - -Dsamjdk.use_async_io_write_tribble=true \ - -Dsamjdk.buffer_size=4194304 \ - -cp /usr/local/share/gridss-2.9.4-0/gridss.jar \ - gridss.AnnotateInsertedSequence \ - REFERENCE_SEQUENCE=~{viralReference} \ - INPUT=~{inputVcf} \ - OUTPUT=~{outputPath} \ - ALIGNMENT=APPEND \ - WORKING_DIR='.' \ - WORKER_THREADS=~{threads} + gridss_annotate_vcf_repeatmasker + --output ~{outputPath} \ + --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + -w . \ + ~{gridssVcf} } output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File annotatedVcf = outputPath + File annotatedVcfIndex = "~{outputPath}.tbi" } runtime { - cpu: threads - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF file.", category: "required"} - outputPath: {description: "The path the output will be written to.", category: "common"} - viralReference: {description: "A fasta file with viral sequences.", category: "required"} - viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} - viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} - viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} - + gridssVcf: {description: "The GRIDSS output.", category: "required"} + gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} +} \ No newline at end of file diff --git a/hmftools.wdl b/hmftools.wdl index 1b9d8d22..7d6f1547 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -304,7 +304,8 @@ task GripssApplicationKt { -breakend_pon ~{breakendPon} \ -breakpoint_pon ~{breakpointPon} \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} \ + -paired_normal_tumor_ordinals } output { @@ -463,7 +464,7 @@ task Linx { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.15--hdfd78af_0" } command { @@ -565,7 +566,7 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.54--hdfd78af_0" } command { @@ -706,7 +707,7 @@ task Sage { String javaXmx = "50G" String memory = "60G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } command { From 04c65ab38a2d91051e3c0aa90c67738b755a4921 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 28 Jun 2021 15:29:21 +0200 Subject: [PATCH 521/902] add virusbreakend --- gridss.wdl | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 3844c602..52e039d1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -164,7 +164,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3) + Int timeMinutes = 1 + ceil(size(gridssVcf, "G") * 3) } command { @@ -181,6 +181,12 @@ task GridssAnnotateVcfRepeatmasker { } runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { gridssVcf: {description: "The GRIDSS output.", category: "required"} gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} @@ -189,4 +195,57 @@ task GridssAnnotateVcfRepeatmasker { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} + +task Virusbreakend { + input { + File bam + File bamIndex + File referenceFasta + File virusbreakendDB + String outputPath = "./virusbreakend.vcf" + + String memory = "75G" + Int threads = 8 + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 180 + } + + command { + mkdir virusbreakenddb + tar -xzvf ~{virusbreakendDB} -C virusbreakenddb --strip-components 1 + virusbreakend \ + --output ~{outputPath} \ + --workingdir . \ + --reference ~{referenceFasta} \ + --db virusbreakenddb \ + --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + -t ~{threads} \ + ~{bam} + } + + output { + File vcf = outputPath + File summary = "~{outputPath}.summary.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + bam: {description: "A BAM file.", category: "required"} + bamIndex: {description: "The index for the BAM file.", category: "required"} + referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + threads: {description: "The number of the threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From c2f223eb6a487d7c5bca957bdaaf830d0522d3cb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Jun 2021 13:26:01 +0200 Subject: [PATCH 522/902] add virusinterpreter --- hmftools.wdl | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 7d6f1547..f1617bbe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -778,3 +778,54 @@ task Sage { category: "advanced"} } } + +task VirusInterpreter { + input { + String sampleId + File virusBreakendTsv + File taxonomyDbTsv + File virusInterpretationTsv + File virusBlacklistTsv + String outputDir = "." + + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biowdl/virus-interpreter:1.0" + } + + command { + virus-interpreter -Xmx~{javaXmx} \ + -sample_id ~{sampleId} \ + -virus_breakend_tsv ~{virusBreakendTsv} \ + -taxonomy_db_tsv ~{taxonomyDbTsv} \ + -virus_interpretation_tsv ~{virusInterpretationTsv} \ + -virus_blacklist_tsv ~{virusBlacklistTsv} \ + -output_dir ~{outputDir} + } + + output { + File virusAnnotatedTsv = "~{outputDir}/~{sampleId}.virus.annotated.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleId: {description: "The name of the sample.", category: "required"} + virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"} + taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"} + virusInterpretationTsv: {description: "A virus interpretation tsv.", category: "required"} + virusBlacklistTsv: {description: "A virus blacklist tsv.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From f169d78589c3e4d2a97892cfc3fb685d6c217d6c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Jun 2021 16:02:28 +0200 Subject: [PATCH 523/902] add protect --- hmftools.wdl | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index f1617bbe..646d01ea 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -542,6 +542,101 @@ task Linx { } } +task Protect { + input { + String refGenomeVersion + String tumorName + String normalName + Array[String]+ sampleDoids + String outputDir = "." + Array[File]+ serveActionability + File doidsJson + File purplePurity + File purpleQc + File purpleDriverCatalogSomatic + File purpleDriverCatalogGermline + File purpleSomaticVariants + File purpleSomaticVariantsIndex + File purpleGermlineVariants + File purpleGermlineVariantsIndex + File purpleGeneCopyNumber + File linxFusion + File linxBreakend + File linxDriversCatalog + File chordPrediction + File annotatedVirus + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biowdl/protect:v1.4" + } + + command { + protect -Xmx~{javaXmx} \ + -ref_genome_version ~{refGenomeVersion} \ + -tumor_sample_id ~{tumorName} \ + -reference_sample_id ~{normalName} \ + -primary_tumor_doids ~{sep=";" sampleDoids} \ + -output_dir ~{outputDir} \ + -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ + -doid_json ~{doidsJson} \ + -purple_purity_tsv ~{purplePurity} \ + -purple_qc_file ~{purpleQc} \ + -purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \ + -purple_germline_driver_catalog_tsv ~{purpleDriverCatalogGermline} \ + -purple_somatic_variant_vcf ~{purpleSomaticVariants} \ + -purple_germline_variant_vcf ~{purpleGermlineVariants} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumber} \ + -linx_fusion_tsv ~{linxFusion} \ + -linx_breakend_tsv ~{linxBreakend} \ + -linx_driver_catalog_tsv ~{linxDriversCatalog} \ + -chord_prediction_txt ~{chordPrediction} \ + -annotated_virus_tsv ~{annotatedVirus} + } + + output { + File protectTsv = "~{outputDir}/~{tumorName}.protect.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + normalName: {description: "The name of the normal sample.", category: "required"} + sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"} + doidsJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + purplePurity: {description: "The purity file generated by purple.", category: "required"} + purpleQc: {description: "The QC file generated by purple.", category: "required"} + purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"} + purpleDriverCatalogGermline: {description: "The germline driver catalog generated by purple.", category: "required"} + purpleSomaticVariants: {description: "The somatic VCF generated by purple.", category: "required"} + purpleSomaticVariantsIndex: {description: "The index for the somatic VCF generated by purple.", category: "required"} + purpleGermlineVariants: {description: "The germline VCF generated by purple.", category: "required"} + purpleGermlineVariantsIndex: {description: "The index of the germline VCF generated by purple.", category: "required"} + purpleGeneCopyNumber: {description: "The gene copy number file generated by purple.", category: "required"} + linxFusion: {description: "The fusion file generated by linx.", category: "required"} + linxBreakend: {description: "The breakend file generated by linx.", category: "required"} + linxDriversCatalog: {description: "The driver catalog generated generated by linx.", category: "required"} + chordPrediction: {description: "The chord prediction file.", category: "required"} + annotatedVirus: {description: "The virus-interpreter output.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Purple { input { String normalName From 47c89884e700c1c7ad11ba26e195d7812a6f1fac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 30 Jun 2021 14:27:10 +0200 Subject: [PATCH 524/902] update CPAT to 3.0.4 --- CHANGELOG.md | 2 ++ CPAT.wdl | 17 +++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28e998e6..112b8f04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update CPAT to version 3.0.4. + + Changed the `outFilePath` input to `outputPrefix`. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. + UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) diff --git a/CPAT.wdl b/CPAT.wdl index afb67853..b3414bc5 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -23,7 +23,7 @@ version 1.0 task CPAT { input { File gene - String outFilePath + String outputPrefix File hex File logitModel @@ -35,7 +35,7 @@ task CPAT { Array[String]? stopCodons Int timeMinutes = 10 + ceil(size(gene, "G") * 30) - String dockerImage = "biocontainers/cpat:v1.2.4_cv1" + String dockerImage = "biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } # Some WDL magic in the command section to properly output the start and @@ -47,7 +47,7 @@ task CPAT { mkdir -p "$(dirname ~{outFilePath})" cpat.py \ --gene ~{gene} \ - --outfile ~{outFilePath} \ + --outfile ~{outputPrefix} \ --hex ~{hex} \ --logitModel ~{logitModel} \ ~{"--ref " + referenceGenome} \ @@ -56,7 +56,11 @@ task CPAT { } output { - File outFile = outFilePath + File orfSeqs = "~{outputPrefix}.ORF_seqs.fa" + File orfProb = "~{outputPrefix}.ORF_prob.tsv" + File orfProbBest = "~{outputPrefix}.ORF_prob.best.tsv" + File noOrf = "~{outputPrefix}.no_ORF.txt" + File rScript = "~{outputPrefix}.r" } runtime { @@ -67,7 +71,7 @@ task CPAT { parameter_meta { # inputs gene: {description: "Equivalent to CPAT's `--gene` option.", category: "required"} - outFilePath: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} + outputPrefix: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} hex: {description: "Equivalent to CPAT's `--hex` option.", category: "required"} logitModel: {description: "Equivalent to CPAT's `--logitModel` option.", category: "required"} referenceGenome: {description: "Equivalent to CPAT's `--ref` option.", category: "advanced"} @@ -76,9 +80,6 @@ task CPAT { stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # outputs - outFile: {description: "CPAT logistic regression model."} } } From d4d36e02f167fc1676071d67c6749feee44c510d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 30 Jun 2021 15:00:45 +0200 Subject: [PATCH 525/902] fix mkdir in CPAT --- CPAT.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index afce53e2..972613cf 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -45,7 +45,7 @@ task CPAT { # to non-optionals. command { set -e - mkdir -p "$(dirname ~{outFilePath})" + mkdir -p "$(dirname ~{outputPrefix})" cpat.py \ --gene ~{gene} \ --outfile ~{outputPrefix} \ From 8eb013496e3e81107ed18d8c5f067a9ffec15dea Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 30 Jun 2021 16:49:50 +0200 Subject: [PATCH 526/902] fix cpat dockerimage --- CPAT.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index 972613cf..e6cef3ea 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -36,7 +36,7 @@ task CPAT { String memory = "4G" Int timeMinutes = 10 + ceil(size(gene, "G") * 30) - String dockerImage = "biocontainers/cpat:3.0.4--py39hcbe4a3b_0" + String dockerImage = "quay.io/biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } # Some WDL magic in the command section to properly output the start and From 3c92beac7d694209332b66e6869c7c7b6a3ea885 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 1 Jul 2021 12:28:31 +0200 Subject: [PATCH 527/902] remove tabix from gridss --- gridss.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 52e039d1..6c8899e4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -112,7 +112,6 @@ task GRIDSS { ~{"--blacklist " + blacklistBed} \ ~{normalBam} \ ~{tumorBam} - tabix -p vcf ~{outputPrefix}.vcf.gz samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai } From 44a70a394df432fe678a0fa82ef015acf3e5c6d7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 09:42:45 +0200 Subject: [PATCH 528/902] fix missing backslash --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 6c8899e4..f9a92f56 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -167,7 +167,7 @@ task GridssAnnotateVcfRepeatmasker { } command { - gridss_annotate_vcf_repeatmasker + gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ -w . \ From 7c5ce8c031f34744f9759e59b2617113120a40be Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 10:50:50 +0200 Subject: [PATCH 529/902] set default timeMinutes GridssAnnotateVcfRepeatmasker to 120 --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index f9a92f56..02f32297 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(gridssVcf, "G") * 3) + Int timeMinutes = 120 } command { From f3ac54310bf8eabcf1fdeb61d1caca2149bac033 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 2 Jul 2021 12:35:33 +0200 Subject: [PATCH 530/902] Update spades.wdl Co-authored-by: Davy Cats --- spades.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/spades.wdl b/spades.wdl index 1f246d48..3975dd32 100644 --- a/spades.wdl +++ b/spades.wdl @@ -34,16 +34,16 @@ task Spades { File? tslrContigs File? trustedContigs File? untrustedContigs - Boolean singleCell = False - Boolean metagenomic = False - Boolean rna = False - Boolean plasmid = False - Boolean ionTorrent = False - Boolean onlyErrorCorrection = False - Boolean onlyAssembler = False - Boolean careful = False - Boolean disableGzipOutput = False - Boolean disableRepeatResolution = False + Boolean singleCell = false + Boolean metagenomic = false + Boolean rna = false + Boolean plasmid = false + Boolean ionTorrent = false + Boolean onlyErrorCorrection = false + Boolean onlyAssembler = false + Boolean careful = false + Boolean disableGzipOutput = false + Boolean disableRepeatResolution = false File? dataset File? tmpDir String? k From 4e0ab25eead014f9e9038bab7ea61a810dbb05cc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 13:02:18 +0200 Subject: [PATCH 531/902] small formatting fix --- gffcompare.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index aa7c7209..d06602bc 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -116,8 +116,8 @@ task GffCompare { runtime { memory: memory - time_minutes: timeMinutes - docker: dockerImage + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { From 497f12a7446dc80873a66fa00db1c9bbc0eece99 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 13:47:29 +0200 Subject: [PATCH 532/902] adjust repeatmasker time --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 02f32297..db20a203 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 120 + Int timeMinutes = 1440 } command { From 7f4433f50b5ef8deaeb1d86beaaaae5ff07bae41 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 15:25:28 +0200 Subject: [PATCH 533/902] fix missing memory runtime BgzipAndIndex --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index d34df51e..c8837d94 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -46,6 +46,7 @@ task BgzipAndIndex { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } From 477f00f57a1bf445672da7b7be7ed999e6230e93 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 5 Jul 2021 09:13:07 +0200 Subject: [PATCH 534/902] increase time for GridssAnnotateVcfRepeatmasker --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index db20a203..f137f968 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1440 + Int timeMinutes = 2880 } command { From 4a32a443a29e324b8b01fac1fdbc01a7f2078f79 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 7 Jul 2021 09:39:47 +0200 Subject: [PATCH 535/902] increase memory repeatmasker --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index f137f968..ad230d05 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -161,7 +161,7 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "4G" + String memory = "50G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" Int timeMinutes = 2880 } From 9d3b5a556bd642d8dc8d098694497a5a3b1950fb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 7 Jul 2021 09:46:34 +0200 Subject: [PATCH 536/902] add threads to repeatmasker --- gridss.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index ad230d05..069d6953 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -162,6 +162,7 @@ task GridssAnnotateVcfRepeatmasker { String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" String memory = "50G" + Int threads = 4 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" Int timeMinutes = 2880 } @@ -171,6 +172,7 @@ task GridssAnnotateVcfRepeatmasker { --output ~{outputPath} \ --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ -w . \ + -t ~{threads} \ ~{gridssVcf} } @@ -180,6 +182,7 @@ task GridssAnnotateVcfRepeatmasker { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage @@ -189,6 +192,7 @@ task GridssAnnotateVcfRepeatmasker { gridssVcf: {description: "The GRIDSS output.", category: "required"} gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} + threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From a40300a4d6fb9296ca9e4a1978fbeffe3cb86f90 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 16:59:58 +0200 Subject: [PATCH 537/902] update submodule scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index c31670d3..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From ff2b1efb8482282288107b28e1bf53ca91319b30 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 17:00:43 +0200 Subject: [PATCH 538/902] Change current development version in CHANGELOG.md to stable version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eda114e..7cb7a436 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.0-dev +version 5.0.0 --------------------------- + Update CPAT to version 3.0.4. + Changed the `outFilePath` input to `outputPrefix`. From a411311e0d74045541a000176c2f172c7d0679fd Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 17:02:48 +0200 Subject: [PATCH 539/902] update CI.yml --- .github/workflows/ci.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 97d329ad..78566111 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,8 @@ name: Continuous integration on: pull_request: - paths: - - "**.wdl" # Workflow files and task - - "**.yml" # Ci configuration, tests and docker images - - "!docs/**" + paths_ignore: + - "docs/**" defaults: run: @@ -29,4 +27,4 @@ jobs: - name: install requirements run: conda install -n test cromwell miniwdl wdl-aid - name: run linting - run: bash scripts/biowdl_lint.sh \ No newline at end of file + run: bash scripts/biowdl_lint.sh From 9b0873ab9180e4af3a3ab869a4e909f5f0ee327e Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 17:20:45 +0200 Subject: [PATCH 540/902] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0062ac97..09b254e9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.0.0 +6.0.0 From c80402130bdb7471e8f37fece8cb643625a0df02 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 09:58:51 +0200 Subject: [PATCH 541/902] fix Xmx in AnnotateInsertedSequence --- gridss.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 069d6953..aedac9ab 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -39,7 +39,9 @@ task AnnotateInsertedSequence { } command { - AnnotateInsertedSequence -Xmx~{javaXmx} \ + set -e + _JAVA_OPTIONS="${_JAVA_OPTIONS}:-Xmx~{javaXmx}" + AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ OUTPUT=~{outputPath} \ @@ -215,6 +217,7 @@ task Virusbreakend { } command { + set -e mkdir virusbreakenddb tar -xzvf ~{virusbreakendDB} -C virusbreakenddb --strip-components 1 virusbreakend \ From 28b1a835d558d8ecd60682e9105731b6762f4c30 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 11:32:44 +0200 Subject: [PATCH 542/902] fix wrong placeholder --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index aedac9ab..66e27ff0 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -40,7 +40,7 @@ task AnnotateInsertedSequence { command { set -e - _JAVA_OPTIONS="${_JAVA_OPTIONS}:-Xmx~{javaXmx}" + _JAVA_OPTIONS="$_JAVA_OPTIONS:-Xmx~{javaXmx}" AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ From d28a2a529ede9ffc89b18628cc012c846354e096 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 11:53:24 +0200 Subject: [PATCH 543/902] typo --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 66e27ff0..fcfed095 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,12 +35,12 @@ task AnnotateInsertedSequence { String javaXmx = "8G" String memory = "9G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + Int timeMinutes = 120 } command { set -e - _JAVA_OPTIONS="$_JAVA_OPTIONS:-Xmx~{javaXmx}" + _JAVA_OPTIONS="$_JAVA_OPTIONS -Xmx~{javaXmx}" AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ From 572114885be2bd0243ac59898c223fbf954e1510 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 14:40:29 +0200 Subject: [PATCH 544/902] update gripss version --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 646d01ea..9dc78dd8 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -290,7 +290,7 @@ task GripssApplicationKt { String memory = "33G" String javaXmx = "32G" Int timeMinutes = 45 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { @@ -346,7 +346,7 @@ task GripssHardFilterApplicationKt { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { From f62a7424b88a1de1e6c1791aeff7c020a60939cd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 14:42:20 +0200 Subject: [PATCH 545/902] fix gripss version in command --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 9dc78dd8..6a086d37 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -295,7 +295,7 @@ task GripssApplicationKt { command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ -reference ~{normalName} \ @@ -351,7 +351,7 @@ task GripssHardFilterApplicationKt { command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} From e470f59fa587bef9dd075eb28ba6317be89a8416 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 10:01:09 +0200 Subject: [PATCH 546/902] update purple and gripss versions --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6a086d37..8c38c501 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -290,7 +290,7 @@ task GripssApplicationKt { String memory = "33G" String javaXmx = "32G" Int timeMinutes = 45 - String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } command { @@ -346,7 +346,7 @@ task GripssHardFilterApplicationKt { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } command { @@ -661,7 +661,7 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.54--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0" } command { From c47163aa1c9d67b5d675444d06afe36e5ee31ec9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 13:24:47 +0200 Subject: [PATCH 547/902] change docker image for purple --- hmftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8c38c501..bf79070e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -661,7 +661,8 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0" + # clone of quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' + String dockerImage = "quay.io/biowdl/hmftools-purple:3.1" } command { From ed6061d1671ba091992248375e613daf57fd544d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 14:20:38 +0200 Subject: [PATCH 548/902] fix linx output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index bf79070e..a327fd0b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -488,7 +488,7 @@ task Linx { } output { - File driverCatalog = "~{outputDir}/~{sampleName}.driver.catalog.tsv" + File driverCatalog = "~{outputDir}/~{sampleName}.linx.driver.catalog.tsv" File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" From 1fbf3eacc269782a6035c0c2e2bec348a31de0e8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 19 Jul 2021 11:54:26 +0200 Subject: [PATCH 549/902] update common.wdl fastqc.wdl samtools.wdl somaticseq.wdl umi-tools.wdl wisestork.wdl: add runtime memory --- common.wdl | 28 +++++++++++++++++++++++----- fastqc.wdl | 4 +++- samtools.wdl | 3 ++- somaticseq.wdl | 10 ++++++++++ umi-tools.wdl | 2 ++ wisestork.wdl | 6 ++++++ 6 files changed, 46 insertions(+), 7 deletions(-) diff --git a/common.wdl b/common.wdl index 54b11567..d29ed5da 100644 --- a/common.wdl +++ b/common.wdl @@ -24,6 +24,8 @@ task AppendToStringArray { input { Array[String] array String string + + String memory = "1G" } command { @@ -36,7 +38,7 @@ task AppendToStringArray { } runtime { - memory: "1G" + memory: memory } } @@ -45,9 +47,11 @@ task CheckFileMD5 { input { File file String md5 + # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -60,6 +64,7 @@ task CheckFileMD5 { runtime { docker: dockerImage + memory: memory } } @@ -69,6 +74,8 @@ task ConcatenateTextFiles { String combinedFilePath Boolean unzip = false Boolean zip = false + + String memory = "1G" } # When input and output is both compressed decompression is not needed. @@ -86,7 +93,7 @@ task ConcatenateTextFiles { } runtime { - memory: "1G" + memory: memory } } @@ -97,6 +104,7 @@ task Copy { Boolean recursive = false # Version not that important as long as it is stable. + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -112,6 +120,7 @@ task Copy { runtime { docker: dockerImage + memory: memory } } @@ -122,6 +131,8 @@ task CreateLink { input { String inputFile String outputPath + + String memory = "1G" } command { @@ -131,12 +142,17 @@ task CreateLink { output { File link = outputPath } + + runtime { + memory: memory + } } task MapMd5 { input { Map[String,String] map + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -150,7 +166,7 @@ task MapMd5 { } runtime { - memory: "1G" + memory: memory docker: dockerImage } } @@ -160,6 +176,7 @@ task StringArrayMd5 { input { Array[String] stringArray + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -173,7 +190,7 @@ task StringArrayMd5 { } runtime { - memory: "1G" + memory: memory docker: dockerImage } } @@ -183,6 +200,7 @@ task TextToFile { String text String outputFile = "out.txt" + String memory = "1G" Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -196,7 +214,7 @@ task TextToFile { } runtime { - memory: "1G" + memory: memory time_minutes: timeMinutes docker: dockerImage } diff --git a/fastqc.wdl b/fastqc.wdl index 973eeed9..3a07db4e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -143,6 +143,7 @@ task Fastqc { task GetConfiguration { input { + String memory = "2G" # Needs more than 1 to pull the docker image. Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/fastqc:0.11.7--4" } @@ -165,7 +166,7 @@ task GetConfiguration { } runtime { - memory: "2G" # Needs more than 1 to pull the docker image. + memory: memory time_minute: timeMinutes docker: dockerImage } @@ -173,6 +174,7 @@ task GetConfiguration { parameter_meta { # inputs timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/samtools.wdl b/samtools.wdl index 46d1eb70..04e27fca 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -25,7 +25,7 @@ task BgzipAndIndex { File inputFile String outputDir String type = "vcf" - + String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -47,6 +47,7 @@ task BgzipAndIndex { runtime { time_minutes: timeMinutes docker: dockerImage + memory: memory } parameter_meta { diff --git a/somaticseq.wdl b/somaticseq.wdl index 07103ef9..27c3fe36 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -49,6 +49,7 @@ task ParallelPaired { Int threads = 1 Int timeMinutes = 60 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -92,6 +93,7 @@ task ParallelPaired { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -161,6 +163,7 @@ task ParallelPairedTrain { Int threads = 1 Int timeMinutes = 240 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -203,6 +206,7 @@ task ParallelPairedTrain { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -266,6 +270,7 @@ task ParallelSingle { Int threads = 1 Int timeMinutes = 60 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -302,6 +307,7 @@ task ParallelSingle { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -355,6 +361,7 @@ task ParallelSingleTrain { Int threads = 1 Int timeMinutes = 240 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -390,6 +397,7 @@ task ParallelSingleTrain { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -430,6 +438,7 @@ task ModifyStrelka { String outputVCFName = basename(strelkaVCF, ".gz") Int timeMinutes = 20 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -448,6 +457,7 @@ task ModifyStrelka { runtime { time_minutes: timeMinutes + memory: memory docker: dockerImage } diff --git a/umi-tools.wdl b/umi-tools.wdl index 86bf1314..b79817c2 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -30,6 +30,7 @@ task Extract { String? read2Output = "umi_extracted_R2.fastq.gz" Boolean threePrime = false + String memory = "20G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } @@ -51,6 +52,7 @@ task Extract { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } diff --git a/wisestork.wdl b/wisestork.wdl index 6be32168..8fb4b76b 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -31,6 +31,7 @@ task Count { Int? binSize File? binFile + String memory = "2G" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -50,6 +51,7 @@ task Count { } runtime { + memory: memory docker: dockerImage } } @@ -67,6 +69,7 @@ task GcCorrect { Int? iter Float? fracLowess + String memory = "2G" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -89,6 +92,7 @@ task GcCorrect { } runtime { + memory: memory docker: dockerImage } } @@ -143,6 +147,7 @@ task Zscore { Int? binSize File? binFile + String memory = "2G" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -163,6 +168,7 @@ task Zscore { } runtime { + memory: memory docker: dockerImage } } From 84cd7f692e38b4903e7945315a44265d7e16c5f2 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 09:04:24 +0200 Subject: [PATCH 550/902] update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cb7a436..d7bbc697 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.1-dev +--------------------------- ++ Update number of tasks: add memory runtime version 5.0.0 --------------------------- From e1281833adff3aab2489d315f8e7da98e0e2fade Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 14:41:54 +0200 Subject: [PATCH 551/902] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7bbc697..7f9df602 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.1-dev +version 5.0.1 --------------------------- + Update number of tasks: add memory runtime From c9c36cf805cb70e488136609d99f601b16b1aa66 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 16:39:50 +0200 Subject: [PATCH 552/902] update samtools.wdl somaticseq.wdl: add memory runtime --- samtools.wdl | 3 +++ somaticseq.wdl | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 46d1eb70..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,6 +26,7 @@ task BgzipAndIndex { String outputDir String type = "vcf" + String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -45,6 +46,7 @@ task BgzipAndIndex { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -54,6 +56,7 @@ task BgzipAndIndex { inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/somaticseq.wdl b/somaticseq.wdl index 07103ef9..2992a800 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -47,6 +47,7 @@ task ParallelPaired { File? strelkaSNV File? strelkaIndel + String memory = "2G" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -91,6 +92,7 @@ task ParallelPaired { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -121,6 +123,7 @@ task ParallelPaired { strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + ParallelPaired timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -159,6 +162,7 @@ task ParallelPairedTrain { File? strelkaSNV File? strelkaIndel + String memory = "2G" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -202,6 +206,7 @@ task ParallelPairedTrain { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -232,6 +237,7 @@ task ParallelPairedTrain { strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -264,6 +270,7 @@ task ParallelSingle { File? scalpelVCF File? strelkaVCF + String memory = "2G" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -301,6 +308,7 @@ task ParallelSingle { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -323,6 +331,7 @@ task ParallelSingle { scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -353,6 +362,7 @@ task ParallelSingleTrain { File? scalpelVCF File? strelkaVCF + String memory = "2G" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -389,6 +399,7 @@ task ParallelSingleTrain { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -411,6 +422,7 @@ task ParallelSingleTrain { scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -429,6 +441,7 @@ task ModifyStrelka { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") + String memory = "2G" Int timeMinutes = 20 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -447,6 +460,7 @@ task ModifyStrelka { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -455,6 +469,7 @@ task ModifyStrelka { # inputs strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 67c69d4eb336c69de999c66fadccf6c91345e0c7 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 16:46:45 +0200 Subject: [PATCH 553/902] small fix --- somaticseq.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/somaticseq.wdl b/somaticseq.wdl index 2992a800..63f8362e 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -123,7 +123,7 @@ task ParallelPaired { strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} - ParallelPaired + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 5cb91703fb777ae35bcf6e509f7e124643339891 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 14:18:56 +0200 Subject: [PATCH 554/902] update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cb7a436..dfb81c47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.1-dev +--------------------------- ++ add runtime memory to number of tasks. version 5.0.0 --------------------------- From 70cb8bdcbfa7d9384b2fd943a2686d01357854d3 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 14:34:30 +0200 Subject: [PATCH 555/902] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfb81c47..b3dbc7f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.1-dev +version 5.0.1 --------------------------- + add runtime memory to number of tasks. From 8349056c8bd768e472c4178201f3241edaa7952f Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 16:05:31 +0200 Subject: [PATCH 556/902] remove duplicated memory --- somaticseq.wdl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/somaticseq.wdl b/somaticseq.wdl index afe2918f..8c7fb884 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -50,7 +50,6 @@ task ParallelPaired { String memory = "2G" Int threads = 1 Int timeMinutes = 60 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -167,7 +166,6 @@ task ParallelPairedTrain { String memory = "2G" Int threads = 1 Int timeMinutes = 240 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -277,7 +275,6 @@ task ParallelSingle { String memory = "2G" Int threads = 1 Int timeMinutes = 60 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -371,7 +368,6 @@ task ParallelSingleTrain { String memory = "2G" Int threads = 1 Int timeMinutes = 240 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -451,7 +447,6 @@ task ModifyStrelka { String memory = "2G" Int timeMinutes = 20 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } From bc1bacf11498d2d30b85591cfccdcf71ef0966a5 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 16:22:35 +0200 Subject: [PATCH 557/902] remove duplicate memory --- samtools.wdl | 1 - somaticseq.wdl | 5 ----- 2 files changed, 6 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 54215831..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -49,7 +49,6 @@ task BgzipAndIndex { memory: memory time_minutes: timeMinutes docker: dockerImage - memory: memory } parameter_meta { diff --git a/somaticseq.wdl b/somaticseq.wdl index 8c7fb884..63f8362e 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -94,7 +94,6 @@ task ParallelPaired { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -209,7 +208,6 @@ task ParallelPairedTrain { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -312,7 +310,6 @@ task ParallelSingle { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -404,7 +401,6 @@ task ParallelSingleTrain { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -466,7 +462,6 @@ task ModifyStrelka { runtime { memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } From ab17de947e0509b853a60e87e80399e1ca83f826 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 13:37:37 +0200 Subject: [PATCH 558/902] add task for peach --- peach.wdl | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 peach.wdl diff --git a/peach.wdl b/peach.wdl new file mode 100644 index 00000000..9321d6bf --- /dev/null +++ b/peach.wdl @@ -0,0 +1,77 @@ +version 1.0 + +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Peach { + input { + File transcriptTsv + File germlineVcf + File germlineVcfIndex + File tumorName + File normalName + String outputDir = "./peach" + File panelJson + + String memory = "8G" + String dockerImage = "quay.io/biowdl/peach:v1.0" + Int timeMinutes = 20 + } + + command { + peach \ + --recreate_bed \ + --transcript_tsv ~{transcriptTsv} \ + ~{germlineVcf} \ + ~{tumorName} \ + ~{normalName} \ + 1.0 \ + ~{outputDir} \ + ~{panelJson} \ + vcftools + } + + output { + File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" + File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" + File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" + Array[File] peachFiles = [callsTsv, filterVcf, genotypeTsv] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + transcriptTsv: {description: "A tsv file describing transcripts.", category: "required"} + germlineVcf: {description: "The germline VCF file from hmftools' purple.", category: "required"} + germlineVcfIndex: {description: "The germline VCF's index.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + normalName: {description: "The name of the normal sample", category: "required"} + outputDir: {description: "The directory the ouput should be written to.", category: "required"} + panelJson: {description: "A JSON describing the panel.", category: "required"} + + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 1648c818b856f22ed9e7c8b6443d2e9bc072eb6a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 14:21:41 +0200 Subject: [PATCH 559/902] rename array output peach --- peach.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peach.wdl b/peach.wdl index 9321d6bf..72c7fde6 100644 --- a/peach.wdl +++ b/peach.wdl @@ -52,7 +52,7 @@ task Peach { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] peachFiles = [callsTsv, filterVcf, genotypeTsv] + Array[File] outputs = [callsTsv, filterVcf, genotypeTsv] } runtime { From dcafd29087866bfa4bc464e9fd301e8de234c138 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 15:20:03 +0200 Subject: [PATCH 560/902] fix validation issues --- peach.wdl | 2 +- samtools.wdl | 2 -- scripts | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/peach.wdl b/peach.wdl index 72c7fde6..5e0746aa 100644 --- a/peach.wdl +++ b/peach.wdl @@ -52,7 +52,7 @@ task Peach { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] outputs = [callsTsv, filterVcf, genotypeTsv] + Array[File] outputs = [callsTsv, filteredVcf, genotypeTsv] } runtime { diff --git a/samtools.wdl b/samtools.wdl index 7eb86351..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -28,7 +28,6 @@ task BgzipAndIndex { String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) - String memory = "1G" String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -59,7 +58,6 @@ task BgzipAndIndex { type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/scripts b/scripts index 84690a30..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From dce31f572b08b3ef1ff3209f101ec4e3e838646c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 29 Jul 2021 12:05:22 +0200 Subject: [PATCH 561/902] update linx version --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index a327fd0b..1e25938d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -464,7 +464,7 @@ task Linx { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.15--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.16--hdfd78af_0" } command { From 09d899b85aec47bcb065cb8b584e703828d488e7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Aug 2021 13:27:41 +0200 Subject: [PATCH 562/902] add bedtools coverage --- bedtools.wdl | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/bedtools.wdl b/bedtools.wdl index 3dbf93cb..f8713d2e 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -66,6 +66,55 @@ task Complement { } } +task Coverage { + input { + File genomeFile + File a + File? aIndex + File b + File? bIndex + String outputPath = "./coverage.tsv" + + String memory = "8G" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + + command { + bedtools coverage \ + -sorted \ + -g ~{genomeFile} \ + -a ~{a} \ + -b ~{b} \ + -d \ + > ~{outputPath} + } + + output { + File coverageTsv = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + genomeFile: {description: "A file listing the chromosomes and their lengths.", category: "required"} + a: {description: "The file containing the regions for which the coverage will be counted.", category: "required"} + aIndex: {description: "An index for the file given as `a`.", category: "common"} + b: {description: "The file in which the coverage will be counted. Likely a BAM file.", category: "required"} + bIndex: {description: "An index for the file given as `b`.", category: "common"} + outputPath: {description: "The path the ouptu will be written to.", category: "common"} + + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } +} + task Merge { input { File inputBed From 70cda88f96eecabb9b9a8d5f75f88515c0840a8f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 18 Aug 2021 16:00:25 +0200 Subject: [PATCH 563/902] add deconstructSigs task --- deconstructsigs.wdl | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 deconstructsigs.wdl diff --git a/deconstructsigs.wdl b/deconstructsigs.wdl new file mode 100644 index 00000000..ef47e3e3 --- /dev/null +++ b/deconstructsigs.wdl @@ -0,0 +1,66 @@ +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +version 1.0 + +task DeconstructSigs { + input { + File signaturesMatrix + File signaturesReference + String outputPath = "./signatures.rds" + + Int timeMinutes = 15 + String memory = "4G" + String dockerImage = "quay.io/biocontainers/r-deconstructsigs:1.9.0--r41hdfd78af_1" + } + + command { + R --no-echo << EOF + library(deconstructSigs) + tumor <- read.table("~{signaturesMatrix}", check.names=F) + ref <- data.frame(t(read.table("~{signaturesReference}", check.names=F, header=T, row.names="Type")), check.names=F) + tumor <- tumor[,colnames(ref)] + + sigs <- whichSignatures(tumor.ref=tumor, row.names(tumor), signatures.ref=ref, contexts.needed=T) + saveRDS(sigs, "~{outputPath}") + EOF + } + + output { + File signatureRDS = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + signaturesMatrix: {description: "A table containing columns represtenting mutation types (matching the types in the signatures reference) and one row with the counts for each of these types for the sample of intrest.", + category: "required"} + signaturesReference: {description: "A table describing the mutational signatures, formatted like those provided by COSMIC.", + category: "required"} + outputPath: {description: "The location the output will be written to.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 26574bf26bef2663e9a67fe99c2a241762eb4365 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 20 Aug 2021 13:43:07 +0200 Subject: [PATCH 564/902] update bedtools version for coverage --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index f8713d2e..1d956cab 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -77,7 +77,7 @@ task Coverage { String memory = "8G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } command { From e8df466dfba91be4e2c08e9fa57607ad48936d01 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 23 Aug 2021 12:11:39 +0200 Subject: [PATCH 565/902] fix incorrect type --- peach.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peach.wdl b/peach.wdl index 5e0746aa..b57842f7 100644 --- a/peach.wdl +++ b/peach.wdl @@ -25,8 +25,8 @@ task Peach { File transcriptTsv File germlineVcf File germlineVcfIndex - File tumorName - File normalName + String tumorName + String normalName String outputDir = "./peach" File panelJson From d76faa5a05528e6a74488b46a18bdfcd1a9402ea Mon Sep 17 00:00:00 2001 From: cedrick Date: Fri, 17 Sep 2021 09:55:37 +0200 Subject: [PATCH 566/902] update bcftools.wdk --- bcftools.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 0cbfdefd..0738d156 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,7 +290,7 @@ task View { input { File inputFile String outputPath = "output.vcf" - + Boolean excludeUncalled = false String? exclude String? include String memory = "256M" @@ -304,8 +304,9 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ - ~{"--include " + include} \ ~{"--exclude " + exclude} \ + ~{"--include " + include} \ + ~{true="--exclude-uncalled" false="" excludeUncalled} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -330,6 +331,8 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 15b12101e04df8d842f68cb5ddef7f7f8a932a9e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Sep 2021 14:55:43 +0200 Subject: [PATCH 567/902] fix protect command with multiple doids --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1e25938d..199d7d88 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -577,7 +577,7 @@ task Protect { -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ -reference_sample_id ~{normalName} \ - -primary_tumor_doids ~{sep=";" sampleDoids} \ + -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ -doid_json ~{doidsJson} \ From a7a504e4a3589787d8c25c5ca97149598b65f572 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Sep 2021 17:01:47 +0200 Subject: [PATCH 568/902] adjust resource settings --- gridss.wdl | 8 ++++---- hmftools.wdl | 4 ++-- peach.wdl | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index fcfed095..03193cca 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -94,7 +94,7 @@ task GRIDSS { File? gridssProperties Int jvmHeapSizeGb = 64 - Int threads = 4 + Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" } @@ -163,10 +163,10 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "50G" - Int threads = 4 + String memory = "25G" + Int threads = 8 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 2880 + Int timeMinutes = 1440 } command { diff --git a/hmftools.wdl b/hmftools.wdl index 199d7d88..f8b13c66 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -799,10 +799,10 @@ task Sage { String? mnvFilterEnabled File? coverageBed - Int threads = 2 + Int threads = 4 String javaXmx = "50G" String memory = "60G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } diff --git a/peach.wdl b/peach.wdl index b57842f7..af44daec 100644 --- a/peach.wdl +++ b/peach.wdl @@ -30,9 +30,9 @@ task Peach { String outputDir = "./peach" File panelJson - String memory = "8G" + String memory = "2G" String dockerImage = "quay.io/biowdl/peach:v1.0" - Int timeMinutes = 20 + Int timeMinutes = 5 } command { From 9d5972de8bd3cb4e0766a78461a989f878f88999 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 30 Sep 2021 11:44:16 +0200 Subject: [PATCH 569/902] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3dbc7f6..71df5def 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.0.1 --------------------------- ++ Smoove: enable genotyping ++ Bcftools: add boolean option to remove uncalled genotypes. + add runtime memory to number of tasks. version 5.0.0 From 48f0c3ebf543b0c2e707c73fb00bdafe308a4395 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 1 Oct 2021 13:26:07 +0200 Subject: [PATCH 570/902] update changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71df5def..a6df9307 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ that users understand how the changes affect the new version. version 5.0.1 --------------------------- + Smoove: enable genotyping -+ Bcftools: add boolean option to remove uncalled genotypes. + add runtime memory to number of tasks. version 5.0.0 From c48f3bb7078e52bbb653848857028ddc9d43a6de Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Oct 2021 12:32:49 +0200 Subject: [PATCH 571/902] increase memory for sambamba markdup --- sambamba.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index e78f50b6..4c2115e0 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize + # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 0932a62d6a00e5c600fcda7c3fa3a7aec40638bb Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Thu, 28 Oct 2021 08:52:01 -0600 Subject: [PATCH 572/902] Update chunked-scatter.wdl Older container is not producing the necessary bed file --- chunked-scatter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index fba1af5a..66954c36 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -86,7 +86,7 @@ task ScatterRegions { String memory = "256M" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" + String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" From 8224e2cb52132a7978db5760afa813d640d2bb74 Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Thu, 28 Oct 2021 08:55:46 -0600 Subject: [PATCH 573/902] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6df9307..6d40cd1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.2 +--------------------------- ++ bumped ScatterRegions container to 1.0.0 + version 5.0.1 --------------------------- + Smoove: enable genotyping From 9e868dbcfbd4374ef6e04fbe389bf550be67a6ba Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 8 Nov 2021 14:26:37 +0100 Subject: [PATCH 574/902] add img input for virusbreakend --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 03193cca..b36d6598 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -207,6 +207,7 @@ task Virusbreakend { File bam File bamIndex File referenceFasta + File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" @@ -246,6 +247,7 @@ task Virusbreakend { bam: {description: "A BAM file.", category: "required"} bamIndex: {description: "The index for the BAM file.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"} virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0a1995df4f853799cb945a2bc8d3ac0062039efd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 9 Nov 2021 12:29:10 +0100 Subject: [PATCH 575/902] try version 2.11.1 for gridss --- gridss.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b36d6598..5c267e79 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 120 } @@ -96,7 +96,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" } command { @@ -165,7 +165,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 1440 } @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 180 } From 7d1f9c92406f9865e8c035a5bd19feea5a22b7ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 10 Nov 2021 12:46:04 +0100 Subject: [PATCH 576/902] upgrade gridss to 2.12.2 --- gridss.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 5c267e79..1f14e23b 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 120 } @@ -96,7 +96,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" } command { @@ -165,14 +165,14 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 1440 } command { gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ - --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ -w . \ -t ~{threads} \ ~{gridssVcf} @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 180 } @@ -226,7 +226,7 @@ task Virusbreakend { --workingdir . \ --reference ~{referenceFasta} \ --db virusbreakenddb \ - --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ -t ~{threads} \ ~{bam} } From f9ed6158bfe70792d546e8e68b205f197c52b2ff Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Nov 2021 10:59:46 +0100 Subject: [PATCH 577/902] increase memory gridss --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 1f14e23b..2e68ed88 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,7 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 64 + Int jvmHeapSizeGb = 85 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" From c9657636bed7c7046e3799a0c3fca36473ae80e6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 10:07:54 +0100 Subject: [PATCH 578/902] increase gridss memory --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 2e68ed88..13596a48 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,7 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 85 + Int jvmHeapSizeGb = 185 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" From f64bd5367fee90d51d47db7c29af13816c9fedbe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 16:08:53 +0100 Subject: [PATCH 579/902] use alternative gridss image for virusbreakend --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 13596a48..b448a2dc 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 180 } From 1a9a8058f3991c0b76e934837dc64f80805fc4c6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 21:55:59 +0100 Subject: [PATCH 580/902] change gridss runtime settings --- gridss.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b448a2dc..d93f1b80 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 120 } @@ -94,9 +94,9 @@ task GRIDSS { File? gridssProperties Int jvmHeapSizeGb = 185 - Int threads = 8 - Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + Int threads = 4 + Int timeMinutes = ceil(7200 / threads) + 180 + String dockerImage = "quay.io/biowdl/gridss:2.12.2" } command { @@ -165,7 +165,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 1440 } From d3d2040093a79814a1bf0488d13a44342068c5b5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Sat, 13 Nov 2021 16:12:52 +0100 Subject: [PATCH 581/902] gridss more memory --- gridss.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index d93f1b80..8c05fe61 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,8 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 185 + Int jvmHeapSizeGb = 200 + Int nonJvmMemoryGb = 50 Int threads = 4 Int timeMinutes = ceil(7200 / threads) + 180 String dockerImage = "quay.io/biowdl/gridss:2.12.2" @@ -126,7 +127,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 15}G" + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } From e9f3c5fdf8aef7082911f6c40730264187cc6884 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 15 Nov 2021 11:55:52 +0100 Subject: [PATCH 582/902] make recovery sv vcf optional in purple --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index f8b13c66..1537bce5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -648,8 +648,8 @@ task Purple { File somaticVcf File germlineVcf File filteredSvVcf - File fullSvVcf - File fullSvVcfIndex + File? fullSvVcf + File? fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -676,7 +676,7 @@ task Purple { -somatic_vcf ~{somaticVcf} \ -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ - -sv_recovery_vcf ~{fullSvVcf} \ + ~{"-sv_recovery_vcf " + fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ -driver_catalog \ From 787ad56b36f24099ece60ae56a43af46cbbeaf00 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 26 Nov 2021 13:46:20 +0100 Subject: [PATCH 583/902] give dictionary and index to virusbreakend --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 8c05fe61..b43a3837 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -208,6 +208,8 @@ task Virusbreakend { File bam File bamIndex File referenceFasta + File referenceFastaFai + File referenceFastaDict File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" From 86a249825272f9bb4384f87057593047402a1a37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Dec 2021 15:36:36 +0100 Subject: [PATCH 584/902] Add sampleposition in array task --- common.wdl | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/common.wdl b/common.wdl index d29ed5da..fc8dc481 100644 --- a/common.wdl +++ b/common.wdl @@ -148,6 +148,43 @@ task CreateLink { } } +task GetSamplePositionInArray { + input { + Array[String] sampleIds + String sample + + # python:3.7-slim's sha256 digest. This image is based on debian buster. + String dockerImage = "python@sha256:e0f6a4df17d5707637fa3557ab266f44dddc46ebfc82b0f1dbe725103961da4e" + } + + command <<< + python <>> + + output { + Int position = read_int(stdout()) + } + + runtime { + # 4 gigs of memory to be able to build the docker image in singularity. + memory: "4G" + docker: dockerImage + } + + parameter_meta { + # inputs + sampleIds: {description: "A list of sample ids.", category: "required"} + sample: {description: "The sample for which the position is wanted.", category: "required"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + position: {description: ""} + } +} + task MapMd5 { input { Map[String,String] map From d970e6892b1e61d34c99e507fb3a62b7b04f2fc1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Dec 2021 16:33:41 +0100 Subject: [PATCH 585/902] Require 5 minutes --- common.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/common.wdl b/common.wdl index fc8dc481..1e4fc8cb 100644 --- a/common.wdl +++ b/common.wdl @@ -172,6 +172,7 @@ task GetSamplePositionInArray { # 4 gigs of memory to be able to build the docker image in singularity. memory: "4G" docker: dockerImage + timeMinutes: 5 } parameter_meta { From c21d27ff32bdf7210dddf98a711e32192e820a82 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:48:24 +0100 Subject: [PATCH 586/902] Add parameter_meta for macs2 --- macs2.wdl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 757eaf67..cbce18e9 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -26,11 +26,10 @@ task PeakCalling { Array[File]+ inputBamsIndex Array[File]+? controlBams Array[File]+? controlBamsIndex - String outDir + String outDir = "macs2" String sampleName Boolean nomodel = false - Int threads = 1 String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -50,8 +49,21 @@ task PeakCalling { } runtime { - cpu: threads + cpu: 1 memory: memory docker: dockerImage } + parameter_meta { + inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} + inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} + controlBams: {description: "Control BAM files for the input bam files.", category: "required"} + controlBamsIndex: {description: "The indexes for the control BAM files.", category: "required"} + sampleName: {description: "Name of the sample to be analysed", category: "required"} + outDir: {description: "All output files will be written in this directory.", category: "advanced"} + nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } } From 24ef56348f4ca8900f639d05aa28ec25fda3fbd1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:52:15 +0100 Subject: [PATCH 587/902] Add time minutes parameter --- macs2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index cbce18e9..983630c5 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,7 +29,7 @@ task PeakCalling { String outDir = "macs2" String sampleName Boolean nomodel = false - + Int timeMinutes = 600 # Default to 10 hours String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -52,6 +52,7 @@ task PeakCalling { cpu: 1 memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} From 17746ebbb5668b8382050105b69f33273019a512 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:56:37 +0100 Subject: [PATCH 588/902] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d40cd1f..126f1ed9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.1.0-dev +--------------------------- ++ Update parameter_meta for macs2 ++ Add sample position in array task. + version 5.0.2 --------------------------- + bumped ScatterRegions container to 1.0.0 From 019cbb96a68c2fca141c955126b0ad9b97511f2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 16:00:30 +0100 Subject: [PATCH 589/902] More correct evaluation of controlBams input --- macs2.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 983630c5..eb71ac1d 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -24,8 +24,8 @@ task PeakCalling { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Array[File]+? controlBams - Array[File]+? controlBamsIndex + Array[File] controlBams + Array[File] controlBamsIndex String outDir = "macs2" String sampleName Boolean nomodel = false @@ -38,7 +38,7 @@ task PeakCalling { set -e macs2 callpeak \ --treatment ~{sep = ' ' inputBams} \ - ~{true="--control" false="" defined(controlBams)} ~{sep = ' ' controlBams} \ + ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ ~{true='--nomodel' false='' nomodel} @@ -57,8 +57,8 @@ task PeakCalling { parameter_meta { inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - controlBams: {description: "Control BAM files for the input bam files.", category: "required"} - controlBamsIndex: {description: "The indexes for the control BAM files.", category: "required"} + controlBams: {description: "Control BAM files for the input bam files.", category: "common"} + controlBamsIndex: {description: "The indexes for the control BAM files.", category: "common"} sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} From 9c5ebf6bb9d32d030b783ed03f329db735a92b6f Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 15 Dec 2021 15:27:24 +0100 Subject: [PATCH 590/902] add umiAwareMarkDuplicate --- picard.wdl | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/picard.wdl b/picard.wdl index f75fdc32..0e189a60 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1004,3 +1004,57 @@ task RenameSample { renamedVcf: {description: "New VCF with renamed sample."} } } + +task UmiAwareMarkDuplicatesWithMateCigar { + input { + File inputBam + String outputPathBam + String outputPathMetrics + String outputPathUmiMetrics + String tempdir + Boolean dedup = true + + String memory = "10G" + Int timeMinutes = 360 + String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" ~{tempdir} + picard UmiAwareMarkDuplicatesWithMateCigar \ + I=~{inputBam} \ + O=~{outputPathBam} \ + M=~{outputPathMetrics} \ + UMI_METRICS_FILE=~{outputPathUmiMetrics} \ + TMP_DIR=~{tempdir} \ + REMOVE_DUPLICATES=~{dedup} \ + CREATE_INDEX=true \ + } + + output { + File outputBam = outputPathBam + File outputBamIndex = sub(outputPathBam, "\.bam$", ".bai") + File outputMetrics = outputPathMetrics + File outputUmiMetrics = outputPathUmiMetrics + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The unsorted input BAM file.", category: "required"} + outputPathBam: {description: "The location the output BAM file should be written to.", category: "required"} + outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} + outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} + tmpDir: {description: "Temporary directory.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } +} \ No newline at end of file From 010ce0ac0835f0faa1353f3f43b544c2b0ecb50c Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 15 Dec 2021 15:27:38 +0100 Subject: [PATCH 591/902] add annotateBamWithUmi --- fgbio.wdl | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 fgbio.wdl diff --git a/fgbio.wdl b/fgbio.wdl new file mode 100644 index 00000000..d50906d3 --- /dev/null +++ b/fgbio.wdl @@ -0,0 +1,68 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task AnnotateBamWithUmis { + input { + File inputBam + File inputUmi + String outputPath + + String memory = "120G" + Int timeMinutes = 360 + String javaXmx="100G" + String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + fgbio -Xmx~{javaXmx} \ + AnnotateBamWithUmis \ + -i ~{inputBam} \ + -f ~{inputUmi} \ + -o ~{outputPath} + } + + output { + File outputBam = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file.", category: "required"} + inputUmi: {description: "The input fastq file with UMIs.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "UMI-annotated output BAM file."} + } +} From 014d43cc204fcf1f7159717c047210ca3f008c40 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Jan 2022 13:32:55 +0100 Subject: [PATCH 592/902] Make sure task is consistent --- picard.wdl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/picard.wdl b/picard.wdl index 0e189a60..d8ce5ebe 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1008,11 +1008,11 @@ task RenameSample { task UmiAwareMarkDuplicatesWithMateCigar { input { File inputBam - String outputPathBam - String outputPathMetrics - String outputPathUmiMetrics - String tempdir - Boolean dedup = true + String outputPath + String outputPathMetrics = outputPath + ".metrics" + String outputPathUmiMetrics = outputPath + ".umi-metrics" + String tempdir = "temp" + Boolean removeDuplicates = true String memory = "10G" Int timeMinutes = 360 @@ -1024,17 +1024,17 @@ task UmiAwareMarkDuplicatesWithMateCigar { mkdir -p "$(dirname ~{outputPath})" ~{tempdir} picard UmiAwareMarkDuplicatesWithMateCigar \ I=~{inputBam} \ - O=~{outputPathBam} \ + O=~{outputPath} \ M=~{outputPathMetrics} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ - REMOVE_DUPLICATES=~{dedup} \ + REMOVE_DUPLICATES=~{removeDuplicates} \ CREATE_INDEX=true \ } output { - File outputBam = outputPathBam - File outputBamIndex = sub(outputPathBam, "\.bam$", ".bai") + File outputBam = outputPath + File outputBamIndex = sub(outputPath, "\.bam$", ".bai") File outputMetrics = outputPathMetrics File outputUmiMetrics = outputPathUmiMetrics } @@ -1048,10 +1048,11 @@ task UmiAwareMarkDuplicatesWithMateCigar { parameter_meta { # inputs inputBam: {description: "The unsorted input BAM file.", category: "required"} - outputPathBam: {description: "The location the output BAM file should be written to.", category: "required"} + outputPath: {description: "The location the output BAM file should be written to.", category: "required"} outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} - tmpDir: {description: "Temporary directory.", category: "advanced"} + removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} + tempdir: {description: "Temporary directory.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8ccabed5e8c56d2f742d5aba829104fe8db00d2d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Jan 2022 14:10:45 +0100 Subject: [PATCH 593/902] Allow multiple bam inputs --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index d8ce5ebe..d2a6ca35 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1007,7 +1007,7 @@ task RenameSample { task UmiAwareMarkDuplicatesWithMateCigar { input { - File inputBam + Array[File] inputBams String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" @@ -1023,7 +1023,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { set -e mkdir -p "$(dirname ~{outputPath})" ~{tempdir} picard UmiAwareMarkDuplicatesWithMateCigar \ - I=~{inputBam} \ + INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ @@ -1047,7 +1047,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { parameter_meta { # inputs - inputBam: {description: "The unsorted input BAM file.", category: "required"} + inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} outputPath: {description: "The location the output BAM file should be written to.", category: "required"} outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} From 89eaf097695f6bda12a20f0d5ce993a230a8342a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 11:11:01 +0100 Subject: [PATCH 594/902] Add script to extract umi from read name --- umi.wdl | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 umi.wdl diff --git a/umi.wdl b/umi.wdl new file mode 100644 index 00000000..fdf764f4 --- /dev/null +++ b/umi.wdl @@ -0,0 +1,100 @@ +version 1.0 + +# Copyright (c) 2022 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task BamReadNameToUmiTag { + + # This task processes a bam file with reads that have been extracted with + # umi-tools extract. The UMI is extracted from the read name again and put + # in the bam file again with umiTag (default RX) + input { + File inputBam + String outputPath = "output.bam" + String umiTag = "RX" + + String memory = "2G" + Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) + String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" + } + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command <<< + python < Tuple[str, str]: + id_and_rest = name.split(maxsplit=1) + if len(id_and_rest) == 1: + id, = id_and_rest + other_parts = "" + else: + id, other_parts = id_and_rest + underscore_index = id.rfind("_") + umi = id[underscore_index + 1:] + new_id = id[:underscore_index] + if other_parts: + return " ".join([new_id, other_parts]), umi + return new_id, umi + + def annotate_umis(in_file, out_file, bam_tag = "RX"): + in_bam = pysam.AlignmentFile(in_file, "rb") + out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + for segment in in_bam: # type: pysam.AlignedSegment + new_name, umi = split_umi_from_name(segment.query_name) + segment.query_name = new_name + # append does not work. (Pysam is not Pythonic.) + segment.tags = segment.tags + [(bam_tag, umi)] + out_bam.write(segment) + + if __name__ == "__main__": + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) + CODE + >>> + + output { + File outputBam = outputBam + File outputBamIndex = outputBamIndex + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "common"} + umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} + + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} + } +} From 0a66c48bb5b75722d641d23c3421d2ca50c5ad21 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 11:46:00 +0100 Subject: [PATCH 595/902] Add umiTagName flag --- picard.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/picard.wdl b/picard.wdl index d2a6ca35..961364e4 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1013,6 +1013,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPathUmiMetrics = outputPath + ".umi-metrics" String tempdir = "temp" Boolean removeDuplicates = true + String umiTagName = "RX" String memory = "10G" Int timeMinutes = 360 @@ -1026,6 +1027,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ + UMI_TAG_NAME=~{umiTagName} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ @@ -1052,6 +1054,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} + umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d383b38d49cec511e9b6212dc1507e10ddc2fcec Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 12:26:23 +0100 Subject: [PATCH 596/902] Dedent overindented code --- umi.wdl | 56 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/umi.wdl b/umi.wdl index fdf764f4..7c435654 100644 --- a/umi.wdl +++ b/umi.wdl @@ -37,38 +37,38 @@ task BamReadNameToUmiTag { String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command <<< python < Tuple[str, str]: - id_and_rest = name.split(maxsplit=1) - if len(id_and_rest) == 1: - id, = id_and_rest - other_parts = "" - else: - id, other_parts = id_and_rest - underscore_index = id.rfind("_") - umi = id[underscore_index + 1:] - new_id = id[:underscore_index] - if other_parts: - return " ".join([new_id, other_parts]), umi - return new_id, umi + def split_umi_from_name(name) -> Tuple[str, str]: + id_and_rest = name.split(maxsplit=1) + if len(id_and_rest) == 1: + id, = id_and_rest + other_parts = "" + else: + id, other_parts = id_and_rest + underscore_index = id.rfind("_") + umi = id[underscore_index + 1:] + new_id = id[:underscore_index] + if other_parts: + return " ".join([new_id, other_parts]), umi + return new_id, umi - def annotate_umis(in_file, out_file, bam_tag = "RX"): - in_bam = pysam.AlignmentFile(in_file, "rb") - out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) - for segment in in_bam: # type: pysam.AlignedSegment - new_name, umi = split_umi_from_name(segment.query_name) - segment.query_name = new_name - # append does not work. (Pysam is not Pythonic.) - segment.tags = segment.tags + [(bam_tag, umi)] - out_bam.write(segment) + def annotate_umis(in_file, out_file, bam_tag = "RX"): + in_bam = pysam.AlignmentFile(in_file, "rb") + out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + for segment in in_bam: # type: pysam.AlignedSegment + new_name, umi = split_umi_from_name(segment.query_name) + segment.query_name = new_name + # append does not work. (Pysam is not Pythonic.) + segment.tags = segment.tags + [(bam_tag, umi)] + out_bam.write(segment) - if __name__ == "__main__": - annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") - pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) + if __name__ == "__main__": + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) CODE >>> From acff4bd9fffbd5a6326b96144f2fe47c2b548a36 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 12:28:28 +0100 Subject: [PATCH 597/902] Also create directories --- umi.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index 7c435654..360405ff 100644 --- a/umi.wdl +++ b/umi.wdl @@ -37,8 +37,9 @@ task BamReadNameToUmiTag { String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command <<< python < Date: Tue, 11 Jan 2022 12:55:10 +0100 Subject: [PATCH 598/902] Correct output files --- umi.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index 360405ff..2a4bc9cf 100644 --- a/umi.wdl +++ b/umi.wdl @@ -75,8 +75,8 @@ task BamReadNameToUmiTag { >>> output { - File outputBam = outputBam - File outputBamIndex = outputBamIndex + File outputBam = outputPath + File outputBamIndex = bamIndexPath } runtime { From 091058e29c0aba1d8c412ec21cda942e7597d23c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 13:55:03 +0100 Subject: [PATCH 599/902] Update changelog --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 126f1ed9..fe0667e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,11 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- -+ Update parameter_meta for macs2 ++ Add a script to subtract UMI's from the read name and add them as + a BAM tag for each BAM record. The script is in umi.BamReadNameToUmiTag. ++ Add fgbio.AnnotateBamWithUmis. ++ Add picard.UmiAwareMarkDuplicatesWithMateCigar. ++ Update parameter_meta for macs2. + Add sample position in array task. version 5.0.2 From 054b7a7f13891c1a85c5a4e8e596e0cfb7d5282a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 16:36:37 +0100 Subject: [PATCH 600/902] Use more conventional list unpacking for clarity --- umi.wdl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/umi.wdl b/umi.wdl index 2a4bc9cf..e149cafe 100644 --- a/umi.wdl +++ b/umi.wdl @@ -45,11 +45,9 @@ task BamReadNameToUmiTag { def split_umi_from_name(name) -> Tuple[str, str]: id_and_rest = name.split(maxsplit=1) - if len(id_and_rest) == 1: - id, = id_and_rest - other_parts = "" - else: - id, other_parts = id_and_rest + id = id_and_rest[0] + # If there was no whitespace id_and_rest will have length 1 + other_parts = id_and_rest[1] if len(id_and_rest) == 2 else "" underscore_index = id.rfind("_") umi = id[underscore_index + 1:] new_id = id[:underscore_index] From 5df62f54b036d396ad78c966e19956a47df552c3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 14 Jan 2022 13:49:16 +0100 Subject: [PATCH 601/902] Add format parameter to macs2 --- macs2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/macs2.wdl b/macs2.wdl index eb71ac1d..e17d613b 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -28,6 +28,7 @@ task PeakCalling { Array[File] controlBamsIndex String outDir = "macs2" String sampleName + String format = "AUTO" Boolean nomodel = false Int timeMinutes = 600 # Default to 10 hours String memory = "8G" @@ -41,6 +42,7 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ + -f ~{format} \ ~{true='--nomodel' false='' nomodel} } From f05d7cb427d00a85994391b0e2829cc704bb3314 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 17 Jan 2022 09:08:56 +0100 Subject: [PATCH 602/902] Use set_tag call from pysam --- umi.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index e149cafe..59169685 100644 --- a/umi.wdl +++ b/umi.wdl @@ -62,8 +62,7 @@ task BamReadNameToUmiTag { for segment in in_bam: # type: pysam.AlignedSegment new_name, umi = split_umi_from_name(segment.query_name) segment.query_name = new_name - # append does not work. (Pysam is not Pythonic.) - segment.tags = segment.tags + [(bam_tag, umi)] + segment.set_tag("RX", umi, value_type="Z") out_bam.write(segment) if __name__ == "__main__": From 28a2801941d6b56a64d1c413a4998ff220cd9899 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 17 Jan 2022 09:25:50 +0100 Subject: [PATCH 603/902] Use proper encoding --- umi.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/umi.wdl b/umi.wdl index 59169685..a32d646a 100644 --- a/umi.wdl +++ b/umi.wdl @@ -59,10 +59,14 @@ task BamReadNameToUmiTag { in_bam = pysam.AlignmentFile(in_file, "rb") os.makedirs(os.path.dirname(out_file), exist_ok=True) out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway. + encoded_bam_tag = bam_tag.encode('ascii') for segment in in_bam: # type: pysam.AlignedSegment new_name, umi = split_umi_from_name(segment.query_name) segment.query_name = new_name - segment.set_tag("RX", umi, value_type="Z") + # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway. + # Value type has to be a string though, otherwise pysam crashes. + segment.set_tag(encoded_bam_tag, umi.encode('ascii'), value_type="Z") out_bam.write(segment) if __name__ == "__main__": From 7b2d86fef3c90983b9ca57a9aded3872756d80e3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 10:52:40 +0100 Subject: [PATCH 604/902] Set xmx value properly for UmiAwareMarkDuplicatesWithMateCigar --- picard.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 961364e4..46b11e51 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1015,7 +1015,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { Boolean removeDuplicates = true String umiTagName = "RX" - String memory = "10G" + String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 360 String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" } @@ -1023,7 +1024,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { command { set -e mkdir -p "$(dirname ~{outputPath})" ~{tempdir} - picard UmiAwareMarkDuplicatesWithMateCigar \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + UmiAwareMarkDuplicatesWithMateCigar \ INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ From 09b97388eea432a1d0b4c37fe65f5621e13e9d0b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 14:18:17 +0100 Subject: [PATCH 605/902] Update Picard and reevaluate use of intel inflater/defaler --- picard.wdl | 99 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 32 deletions(-) diff --git a/picard.wdl b/picard.wdl index 46b11e51..bf32c8ac 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -158,7 +158,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -284,7 +284,7 @@ task CollectRnaSeqMetrics { String memory = "9G" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -342,7 +342,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -404,7 +404,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -453,7 +453,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3G" - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -497,13 +497,15 @@ task GatherBamFiles { String outputBamPath Boolean createMd5File = false - Int? compressionLevel + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater Int javaXmxMb = 1024 Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -513,7 +515,9 @@ task GatherBamFiles { GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ - ~{"COMPRESSION_LEVEL=" + compressionLevel} \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ CREATE_INDEX=true \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } @@ -536,7 +540,9 @@ task GatherBamFiles { inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -555,10 +561,14 @@ task GatherVcfs { Array[File]+ inputVcfIndexes String outputVcfPath = "out.vcf.gz" + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater + String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -566,6 +576,10 @@ task GatherVcfs { mkdir -p "$(dirname ~{outputVcfPath})" picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherVcfs \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ + CREATE_INDEX=true \ INPUT=~{sep=' INPUT=' inputVcfs} \ OUTPUT=~{outputVcfPath} } @@ -590,6 +604,10 @@ task GatherVcfs { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + # outputs outputVcf: {description: "Multiple VCF files gathered into one file."} } @@ -601,14 +619,11 @@ task MarkDuplicates { Array[File]+ inputBams String outputBamPath String metricsPath - Int compressionLevel = 1 Boolean createMd5File = false - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). - # Higher compression levels similar to intel deflater. - # NOTE: this might change in the future when the intel - # deflater is updated! - Boolean useJdkDeflater = true + + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater # The program default for READ_NAME_REGEX is appropriate in nearly every case. # Sometimes we wish to supply "null" in order to turn off optical duplicate detection. @@ -622,7 +637,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -638,6 +653,8 @@ task MarkDuplicates { OUTPUT=~{outputBamPath} \ METRICS_FILE=~{metricsPath} \ COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ VALIDATION_STRINGENCY=SILENT \ ~{"READ_NAME_REGEX=" + read_name_regex} \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ @@ -668,9 +685,9 @@ task MarkDuplicates { outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} - createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} @@ -692,16 +709,20 @@ task MergeVCFs { Array[File]+ inputVCFsIndexes String outputVcfPath Int compressionLevel = 1 - Boolean useJdkInflater = true # Slightly faster than the intel one. + Boolean useJdkInflater = false # Better results for compression level 1 (much smaller). # Higher compression levels similar to intel deflater. # NOTE: this might change in the future when the intel deflater is updated! - Boolean useJdkDeflater = true + # Second NOTE: No it did not change. Only the fastest algorithm with + # worse compression is wrapped in the intel GKL. Instead of using + # one of the slightly slower but better compressing alternatives from ISA-L. + # (Which are also faster than zlib.) + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -757,7 +778,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" File? noneFile } @@ -818,7 +839,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4G" - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -853,13 +874,15 @@ task SortSam { Boolean createMd5File = false Int maxRecordsInRam = 500000 Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater # Default ram of 4 GB. Using 125001.0 to prevent an answer of # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -872,6 +895,8 @@ task SortSam { SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ VALIDATION_STRINGENCY=SILENT \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} @@ -896,7 +921,9 @@ task SortSam { sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"} createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"} maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} - compressionLevel: {description: "Compression level for all compressed files created.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -917,7 +944,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -967,7 +994,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -1014,11 +1041,13 @@ task UmiAwareMarkDuplicatesWithMateCigar { String tempdir = "temp" Boolean removeDuplicates = true String umiTagName = "RX" - + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "8G" String memory = "9G" Int timeMinutes = 360 - String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -1034,6 +1063,9 @@ task UmiAwareMarkDuplicatesWithMateCigar { TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ CREATE_INDEX=true \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -1058,6 +1090,9 @@ task UmiAwareMarkDuplicatesWithMateCigar { removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From a0933e34c55d4bed26510e0fd09fe013441898c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 14:38:42 +0100 Subject: [PATCH 606/902] Add option to assume sort order --- picard.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index bf32c8ac..144c7782 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,6 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" + String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true String umiTagName = "RX" @@ -1065,7 +1066,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ + ~{"ASSUME_SORT_ORDER=" + assumeSortOrder} } output { @@ -1089,6 +1091,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} + assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} From 58682093853cf6e62304d7797f3f268587187669 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 15:42:12 +0100 Subject: [PATCH 607/902] Have more records in ram --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index 144c7782..e81cd4e3 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,6 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" + Int maxRecordsInRam = 3000000 # Default is 500_000 but that will lead to very small files on disk. String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true @@ -1063,6 +1064,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ + MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ From 89a0324ab9467ab79528ce3908701d7b230b2822 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 Jan 2022 12:59:00 +0100 Subject: [PATCH 608/902] increase resources GRIDSS --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b43a3837..7d6a1ebf 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,10 +93,10 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 200 + Int jvmHeapSizeGb = 300 Int nonJvmMemoryGb = 50 Int threads = 4 - Int timeMinutes = ceil(7200 / threads) + 180 + Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } From 99215fdd9834f39569e5672b9daf5b010a777abc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Jan 2022 12:56:23 +0100 Subject: [PATCH 609/902] update scripts and changelog --- CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ bcftools.wdl | 4 ++-- scripts | 2 +- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 126f1ed9..6c0db947 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,43 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task for SnpEff. ++ Adjusted runtime settings for sambamba Markdup. ++ Added a task for sambamba Flagstat. ++ Added a task for Picard CollectWgsMetrics. ++ Added a task for Peach. ++ Added tasks for HMFtools: + + Amber + + Cobalt + + Cuppa + + CuppaChart + + GripssApplicationKt + + GripssHardFilterApplicationKt + + HealthChecker + + Linx + + Protect + + Purple + + Sage + + VirusInterpreter ++ Added a task for VirusBreakend. ++ Added a task for GridssAnnotateVcfRepeatmasker. ++ Bumped GRIDSS version to 2.12.2. ++ Adjusted GRIDSS runtime settings. ++ Added optional inputs to GRIDSS: + + blacklistBed + + gridssProperties ++ Added a task for GRIDSS AnnotateInsertedSequence. ++ Added a task for ExtractSigPredictHRD. ++ Added a task for DeconstructSigs. ++ Added option useSoftclippingForSupplementary (default false) to + BWA mem. ++ Adjusted BWA mem runtime settings. ++ Added a task for bedtools coverage. ++ Added a task for bcftools filter. ++ Adjusted runtime settings for bcftools annotate. ++ Added optional inputs to bcftools annotate: + + inputFileIndex + + annsFileIndex + Update parameter_meta for macs2 + Add sample position in array task. diff --git a/bcftools.wdl b/bcftools.wdl index 13ce36be..88d97cd0 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,8 +44,8 @@ task Annotate { String? regions File? regionsFile File? renameChrs - File? samplesFile - + File? samplesFile + Int threads = 0 String memory = "4G" Int timeMinutes = 60 + ceil(size(inputFile, "G")) diff --git a/scripts b/scripts index c31670d3..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From bf4c1a3e8ab1bbd73a8d7a3fe29a15ac8ad69153 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Jan 2022 16:24:09 +0100 Subject: [PATCH 610/902] adress comments --- hmftools.wdl | 108 +++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1537bce5..0b4ba6d0 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -22,9 +22,9 @@ version 1.0 task Amber { input { - String normalName - File normalBam - File normalBamIndex + String referenceName + File referenceBam + File referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -43,8 +43,8 @@ task Amber { command { AMBER -Xmx~{javaXmx} \ - -reference ~{normalName} \ - -reference_bam ~{normalBam} \ + -reference ~{referenceName} \ + -reference_bam ~{referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ @@ -63,8 +63,8 @@ task Amber { File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" - File normalSnpVcf = "~{outputDir}/~{normalName}.amber.snp.vcf.gz" - File normalSnpVcfIndex = "~{outputDir}/~{normalName}.amber.snp.vcf.gz.tbi" + File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" + File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] @@ -78,9 +78,9 @@ task Amber { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} - normalBam: {description: "The normal BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} + referenceBam: {description: "The normal BAM file.", category: "required"} + referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The tumor BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} @@ -102,9 +102,9 @@ task Amber { task Cobalt { input { - String normalName - File normalBam - File normalBamIndex + String referenceName + File referenceBam + File referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -120,8 +120,8 @@ task Cobalt { command { COBALT -Xmx~{javaXmx} \ - -reference ~{normalName} \ - -reference_bam ~{normalBam} \ + -reference ~{referenceName} \ + -reference_bam ~{referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ @@ -131,9 +131,9 @@ task Cobalt { output { File version = "~{outputDir}/cobalt.version" - File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" - File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" - File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" + File normalGcMedianTsv = "~{outputDir}/~{referenceName}.cobalt.gc.median.tsv" + File normalRationMedianTsv = "~{outputDir}/~{referenceName}.cobalt.ratio.median.tsv" + File normalRationPcf = "~{outputDir}/~{referenceName}.cobalt.ratio.pcf" File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" @@ -150,9 +150,9 @@ task Cobalt { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} - normalBam: {description: "The normal BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} + referenceBam: {description: "The normal BAM file.", category: "required"} + referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The tumor BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} @@ -279,7 +279,7 @@ task GripssApplicationKt { File inputVcf String outputPath = "gripss.vcf.gz" String tumorName - String normalName + String referenceName File referenceFasta File referenceFastaFai File referenceFastaDict @@ -287,8 +287,8 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "33G" - String javaXmx = "32G" + String memory = "32G" + String javaXmx = "31G" Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } @@ -298,7 +298,7 @@ task GripssApplicationKt { -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ - -reference ~{normalName} \ + -reference ~{referenceName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ @@ -383,9 +383,9 @@ task GripssHardFilterApplicationKt { task HealthChecker { input { String outputDir = "." - String normalName - File normalFlagstats - File normalMetrics + String referenceName + File referenceFlagstats + File referenceMetrics String tumorName File tumorFlagstats File tumorMetrics @@ -401,9 +401,9 @@ task HealthChecker { set -e mkdir -p ~{outputDir} health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -reference ~{normalName} \ - -ref_flagstat_file ~{normalFlagstats} \ - -ref_wgs_metrics_file ~{normalMetrics} \ + -reference ~{referenceName} \ + -ref_flagstat_file ~{referenceFlagstats} \ + -ref_wgs_metrics_file ~{referenceMetrics} \ -tumor ~{tumorName} \ -tum_flagstat_file ~{tumorFlagstats} \ -tum_wgs_metrics_file ~{tumorMetrics} \ @@ -425,9 +425,9 @@ task HealthChecker { parameter_meta { outputDir: {description: "The path the output will be written to.", category:"required"} - normalName: {description: "The name of the normal sample.", category: "required"} - normalFlagstats: {description: "The flagstats for the normal sample.", category: "required"} - normalMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} @@ -546,11 +546,11 @@ task Protect { input { String refGenomeVersion String tumorName - String normalName + String referenceName Array[String]+ sampleDoids String outputDir = "." Array[File]+ serveActionability - File doidsJson + File doidJson File purplePurity File purpleQc File purpleDriverCatalogSomatic @@ -576,11 +576,11 @@ task Protect { protect -Xmx~{javaXmx} \ -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ - -reference_sample_id ~{normalName} \ + -reference_sample_id ~{referenceName} \ -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ - -doid_json ~{doidsJson} \ + -doid_json ~{doidJson} \ -purple_purity_tsv ~{purplePurity} \ -purple_qc_file ~{purpleQc} \ -purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \ @@ -608,11 +608,11 @@ task Protect { parameter_meta { refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} - normalName: {description: "The name of the normal sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"} - doidsJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} purplePurity: {description: "The purity file generated by purple.", category: "required"} purpleQc: {description: "The QC file generated by purple.", category: "required"} purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"} @@ -639,7 +639,7 @@ task Protect { task Purple { input { - String normalName + String referenceName String tumorName String outputDir = "./purple" Array[File]+ amberOutput @@ -667,7 +667,7 @@ task Purple { command { PURPLE -Xmx~{javaXmx} \ - -reference ~{normalName} \ + -reference ~{referenceName} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ @@ -713,7 +713,7 @@ task Purple { File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File purpleVersion = "~{outputDir}/purple.version" - File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" + File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" @@ -744,7 +744,7 @@ task Purple { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} outputDir: {description: "The path to the output directory.", category: "common"} amberOutput: {description: "The output files of hmftools amber.", category: "required"} @@ -787,9 +787,9 @@ task Sage { Boolean panelOnly = false String outputPath = "./sage.vcf.gz" - String? normalName - File? normalBam - File? normalBamIndex + String? referenceName + File? referenceBam + File? referenceBamIndex Int? hotspotMinTumorQual Int? panelMinTumorQual Int? hotspotMaxGermlineVaf @@ -801,8 +801,8 @@ task Sage { Int threads = 4 String javaXmx = "50G" - String memory = "60G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 9 / threads) + String memory = "51G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } @@ -810,8 +810,8 @@ task Sage { SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ + ~{"-reference " + referenceName} \ + ~{"-reference_bam " + referenceBam} \ -ref_genome ~{referenceFasta} \ -hotspots ~{hotspots} \ -panel_bed ~{panelBed} \ @@ -848,9 +848,9 @@ task Sage { tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceName: {description: "The name of the normal/reference sample.", category: "common"} + referenceBam: {description: "The BAM file for the normal sample.", category: "common"} + referenceBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} From fb91a02460b22501cc1c57dc381a486a29b01fbd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 27 Jan 2022 12:01:28 +0100 Subject: [PATCH 611/902] update healthchecker --- hmftools.wdl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 0b4ba6d0..76620e3c 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -408,13 +408,16 @@ task HealthChecker { -tum_flagstat_file ~{tumorFlagstats} \ -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -output_dir ~{outputDir} + -output_dir ~{outputDir} + test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' + test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' } - output { - File? healthCheckSucceeded = "~{outputDir}/~{tumorName}.HealthCheckSucceeded" - File? healthCheckFailed = "~{outputDir}/~{tumorName}.HealthCheckFailed" + Boolean succeeded = read_boolean("result") + File outputFile = if succeeded + then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" + else "~{outputDir}/~{tumorName}.HealthCheckFailed" } runtime { From f234b0e8f46192d248e564f22bcd88912b890576 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 28 Jan 2022 14:42:42 +0100 Subject: [PATCH 612/902] add missing parameter_meta --- gridss.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 7d6a1ebf..d3d251a5 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -146,7 +146,8 @@ task GRIDSS { gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} - jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling", category: "advanced"} + nonJvmMemoryGb: {description: "The amount of memory in Gb to be requested besides JVM memory.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From b3b79f62d4a538642318c0316080f9a098ca4b48 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Feb 2022 14:24:45 +0100 Subject: [PATCH 613/902] add a task for Pave --- hmftools.wdl | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 76620e3c..1dbfd5de 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -545,6 +545,79 @@ task Linx { } } +task Pave { + input { + String outputDir = "./" + String sampleName + File vcfFile + File vcfFileIndex + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File refGenomeVersion + File driverGenePanel + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + Int timeMinutes = 50 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biowdl/pave:v1.0" + } + + command { + set -e + mkdir -p ~{outputDir} + pave -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -vcf_file ~{vcfFile} \ + -output_dir ~{outputDir} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -driver_gene_panel ~{driverGenePanel} + } + + output { + File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" + File outputVcfIndex = "~{outputVcf}.tbi" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + vcfFile: {description: "The input VCF file.", category: "required"} + vcfFileIndex: {description: "The index for the input vcf file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + #The following should be in the same directory. + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Protect { input { String refGenomeVersion From 3ffa051fd2be4edb4fbc466836c9da782e68be27 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Feb 2022 17:04:00 +0100 Subject: [PATCH 614/902] add task for gripss 2.0 --- hmftools.wdl | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1dbfd5de..c0c835b5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -274,7 +274,79 @@ task CuppaChart { } } +task Gripss { + input { + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File knownFusionPairBedpe + File breakendPon + File breakpointPon + String referenceName + String tumorName + File vcf + File vcfIndex + String outputDir = "./" + + String memory = "17G" + String javaXmx = "16G" + Int timeMinutes = 50 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + gripss -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -ref_genome ~{referenceFasta} \ + -known_hotspot_file ~{knownFusionPairBedpe} \ + -pon_sgl_file ~{breakendPon} \ + -pon_sv_file ~{breakpointPon} \ + -reference ~{referenceName} \ + -sample ~{tumorName} \ + -vcf ~{vcf} \ + -output_dir ~{outputDir} \ + -output_id somatic + } + + output { + File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" + File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz.tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + knownFusionPairBedpe: {description: "Equivalent to the `-known_hotspot_file` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-pon_sgl_file` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `-pon_sv_file` option.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + vcf: {description: "The input VCF.", category: "required"} + vcfIndex: {description: "The index for the input VCF.", category: "required"} + outputDir: {description: "The path the output will be written to.", category:"required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GripssApplicationKt { + # Obsolete input { File inputVcf String outputPath = "gripss.vcf.gz" @@ -322,13 +394,15 @@ task GripssApplicationKt { parameter_meta { inputVcf: {description: "The input VCF.", category: "required"} outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceName: {description: "The name of the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `-breakpoint_pon` option.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -339,6 +413,7 @@ task GripssApplicationKt { } task GripssHardFilterApplicationKt { + # Obsolete input { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" @@ -724,6 +799,7 @@ task Purple { File somaticVcf File germlineVcf File filteredSvVcf + File filteredSvVcfIndex File? fullSvVcf File? fullSvVcfIndex File referenceFasta From 22a880cdd2223034ebb80fcdb1006b2bd3fe81c7 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 11:52:10 +0100 Subject: [PATCH 615/902] update purple to 3.2 --- hmftools.wdl | 54 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c0c835b5..caafa440 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -65,8 +65,8 @@ task Amber { File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" - Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, - tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, + tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] } @@ -110,7 +110,7 @@ task Cobalt { File tumorBamIndex String outputDir = "./cobalt" File gcProfile - + Int threads = 1 String memory = "5G" String javaXmx = "4G" @@ -174,7 +174,7 @@ task Cuppa { Array[File]+ purpleOutput String sampleName Array[String]+ categories = ["DNA"] - Array[File]+ referenceData + Array[File]+ referenceData File purpleSvVcf File purpleSvVcfIndex File purpleSomaticVcf @@ -244,7 +244,7 @@ task CuppaChart { } command { - set -e + set -e mkdir -p ~{outputDir} cuppa-chart \ -sample ~{sampleName} \ @@ -429,7 +429,7 @@ task GripssHardFilterApplicationKt { -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} } output { @@ -490,7 +490,7 @@ task HealthChecker { output { Boolean succeeded = read_boolean("result") - File outputFile = if succeeded + File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" } @@ -675,10 +675,9 @@ task Pave { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - #The following should be in the same directory. geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} @@ -757,7 +756,7 @@ task Protect { } parameter_meta { - refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} referenceName: {description: "The name of the normal sample.", category: "required"} sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} @@ -800,41 +799,47 @@ task Purple { File germlineVcf File filteredSvVcf File filteredSvVcfIndex - File? fullSvVcf - File? fullSvVcfIndex + File fullSvVcf + File fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict File driverGenePanel File somaticHotspots File germlineHotspots - + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + Int threads = 1 Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - # clone of quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' - String dockerImage = "quay.io/biowdl/hmftools-purple:3.1" + # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' + String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" } command { PURPLE -Xmx~{javaXmx} \ -reference ~{referenceName} \ + -germline_vcf ~{germlineVcf} \ + -germline_hotspots ~{germlineHotspots} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ - -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ - ~{"-sv_recovery_vcf " + fullSvVcf} \ + -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ - -driver_catalog \ - -driver_gene_panel ~{driverGenePanel} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ - -germline_hotspots ~{germlineHotspots} \ + -driver_gene_panel ~{driverGenePanel} \ -threads ~{threads} } @@ -877,8 +882,8 @@ task Purple { File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, - purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, - purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, + purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, + purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, @@ -913,6 +918,11 @@ task Purple { driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0d7909255421e4e7b30cfcd51e68da1530221427 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 12:41:34 +0100 Subject: [PATCH 616/902] update linx to 1.17 --- hmftools.wdl | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index caafa440..810685bf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -529,8 +529,6 @@ task Linx { String outputDir = "./linx" File fragileSiteCsv File lineElementCsv - File replicationOriginsBed - File viralHostsCsv File knownFusionCsv File driverGenePanel #The following should be in the same directory. @@ -539,10 +537,10 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "5G" - String javaXmx = "4G" + String memory = "9G" + String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.16--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.17--hdfd78af_0" } command { @@ -554,9 +552,7 @@ task Linx { -output_dir ~{outputDir} \ -fragile_site_file ~{fragileSiteCsv} \ -line_element_file ~{lineElementCsv} \ - -replication_origins_file ~{replicationOriginsBed} \ - -viral_hosts_file ~{viralHostsCsv} \ - -gene_transcripts_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -check_fusions \ -known_fusion_file ~{knownFusionCsv} \ -check_drivers \ @@ -598,12 +594,10 @@ task Linx { svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} purpleOutput: {description: "The files produced by PURPLE.", category: "required"} - refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} - replicationOriginsBed: {description: "Replication timing input in BED format with replication timing as the 4th column.", category: "required"} - viralHostsCsv: {description: "A list of the viruses which were used for annotation of the GRIDSS results.", category: "required"} knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} From d6bfc449dfc6979511e746a52f6fddf0e30e7853 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 12:47:06 +0100 Subject: [PATCH 617/902] Speed up CI by using conda caching and only checking changed files Squashed commit of the following: commit 7fa743cc028b8e2c86bde49244834ee13c13e95b Author: Ruben Vorderman Date: Fri Feb 11 12:34:33 2022 +0100 Add comment about activate environment commit 2de7802e03f90cd6e26b3d8287fcb0c6b8b81d11 Author: Ruben Vorderman Date: Fri Feb 11 12:26:38 2022 +0100 Invalidate cache commit 8ca394d41361acf2511249e3e29688baf0705004 Author: Ruben Vorderman Date: Fri Feb 11 12:26:27 2022 +0100 Consolidate steps commit 31d09c6f0e86d4625bfa3a6e94a7ced910c7410c Author: Ruben Vorderman Date: Fri Feb 11 11:27:53 2022 +0100 Use correct path for caching commit 7e1374ed323bb38d674da09d7270def4a2192d00 Author: Ruben Vorderman Date: Fri Feb 11 11:22:10 2022 +0100 Do not cache conda packages commit deffd8a0776e15a4df58a1398fcbcb8b0f1430f0 Author: Ruben Vorderman Date: Fri Feb 11 11:20:59 2022 +0100 Remove unnecessary whitespace commit 8e97bcd4dfd8ee459a23f1931465875c0a41fd49 Author: Ruben Vorderman Date: Fri Feb 11 11:20:03 2022 +0100 Remove debugging task commit 8338cd4b843245d781d7028f1f1acad45c8c7d0d Author: Ruben Vorderman Date: Fri Feb 11 11:17:18 2022 +0100 Try to change path commit 6a75baa36eee340d7a6d766c89163e960a6203b0 Author: Ruben Vorderman Date: Fri Feb 11 11:12:18 2022 +0100 Delete path line in current github env commit cbbb9fe67cb796a010c01760ca2e05986f979ced Author: Ruben Vorderman Date: Fri Feb 11 11:05:50 2022 +0100 Properly activate commit 671568b7c8d79a5141429068a32b72814110b361 Author: Ruben Vorderman Date: Fri Feb 11 10:59:46 2022 +0100 Also printenv commit 4c8945e8d5305753482538389ddc8af892f493f9 Author: Ruben Vorderman Date: Fri Feb 11 10:56:45 2022 +0100 Manual activate commit a925c53a99836e81eb0e2b21075356370906c641 Author: Ruben Vorderman Date: Fri Feb 11 10:53:10 2022 +0100 Reset cache number commit 645ed2b4504d067ea1b26a0922943ef3d5c34622 Author: Ruben Vorderman Date: Fri Feb 11 10:51:09 2022 +0100 Activate environment path commit 5852d29fb538b80f06a738677e7ae271c6c57fa3 Author: Ruben Vorderman Date: Fri Feb 11 10:31:07 2022 +0100 Proper setting for cache commit 83f14a939d662d628ca47dc7b82bbc114f164541 Author: Ruben Vorderman Date: Fri Feb 11 10:03:45 2022 +0100 List environments commit 59267fbba267c0b1726733e390ff471d7012cefa Author: Ruben Vorderman Date: Fri Feb 11 10:01:58 2022 +0100 Activate environment manually commit 0a4d2cd5644407308fcc78356a8aef55de86c0c6 Author: Ruben Vorderman Date: Fri Feb 11 09:57:32 2022 +0100 List environments commit 0bc8fa939eb35a6eb352bb58b1235efecd34056f Author: Ruben Vorderman Date: Fri Feb 11 09:52:02 2022 +0100 Add mambaforge comment commit 719d92a0b5245be891d1b5c0eb38d8048abdc5a1 Author: Ruben Vorderman Date: Fri Feb 11 09:44:18 2022 +0100 Use normal conda, since environment is cached commit e5efbb75109f40cfa8b7b33280ec9707a31970d1 Author: Ruben Vorderman Date: Fri Feb 11 09:38:21 2022 +0100 Also cache environments.txt commit 4fa66afb6606ceeb7be577df9f20704d96fc3af0 Author: Ruben Vorderman Date: Fri Feb 11 09:34:46 2022 +0100 Check home commit 2ac42e42829141650585780d27f39d06ebaf8f75 Author: Ruben Vorderman Date: Wed Feb 9 17:00:27 2022 +0100 Add an annoying but effective manual check commit 78d88eae8cb3d1ca44709ce90bcffeb7c5786c1b Author: Ruben Vorderman Date: Wed Feb 9 16:54:29 2022 +0100 Cache correct path commit c05c94561785b1d5e198588dc210313014f3913d Author: Ruben Vorderman Date: Wed Feb 9 16:45:51 2022 +0100 Rename workflow commit 1c67f010c589c1c1fb407ac32e8ed74afdb3ddfd Author: Ruben Vorderman Date: Wed Feb 9 16:45:05 2022 +0100 Use correct quotes commit 7f9d2e559697e9d9d1f6df3514c8269612e7bcee Author: Ruben Vorderman Date: Wed Feb 9 16:42:25 2022 +0100 Only check changed wdl files commit 0e2a15b38e206fdb96d2d8b225999d6e5c9e6e73 Author: Ruben Vorderman Date: Wed Feb 9 16:34:35 2022 +0100 remove v parameter commit 89348dde8a84cd1d935999255c64428c99db7042 Author: Ruben Vorderman Date: Wed Feb 9 16:19:02 2022 +0100 Remove newline commit 752b8cb4a8407908348d8424fdc4b89d3219fdad Author: Ruben Vorderman Date: Wed Feb 9 16:17:33 2022 +0100 Git fetch develop commit 9216a3f846268ba00d0fe922055536b06dc975b3 Author: Ruben Vorderman Date: Wed Feb 9 15:53:13 2022 +0100 Specifically check origin commit b54c140de4fc0bf31d7c95384831aedb253f35a3 Author: Ruben Vorderman Date: Wed Feb 9 15:44:50 2022 +0100 Only chek files that are different from the base with womtool validate commit d963818753272aa18311d3d29276c3db6241e85d Author: Ruben Vorderman Date: Wed Feb 9 15:33:50 2022 +0100 Correctly use data commit 8113bfdd2e1feda6047e13da79885a3131c000e6 Author: Ruben Vorderman Date: Wed Feb 9 15:32:48 2022 +0100 Set correct env cache param commit 4f7af2ed0365887be9147954290c4b807673afdd Author: Ruben Vorderman Date: Wed Feb 9 15:30:23 2022 +0100 Add lint-evnironment commit b026b5a8a77ea131b229a50cb28e0d301915cfb8 Author: Ruben Vorderman Date: Wed Feb 9 15:28:50 2022 +0100 Use mamba env update commit 41fda1a9f52d56578a76f8bf185db86da2128a0e Author: Ruben Vorderman Date: Wed Feb 9 15:25:21 2022 +0100 Use cache commit fd1a64261bea956b6b31a26f5eaa38ce4a63121c Author: Ruben Vorderman Date: Wed Feb 9 14:31:17 2022 +0100 Add missing done statement commit 4a64eb43535f48e0558ba6c5dc408178784ef207 Merge: a36a227 f234b0e Author: Ruben Vorderman Date: Wed Feb 9 14:23:48 2022 +0100 Merge branch 'develop' into BIOWDL-583 commit a36a2274116732bc8e3229a267fe35ee4d61e7da Author: Ruben Vorderman Date: Wed Feb 9 14:23:26 2022 +0100 Implement all checks in lint.yml directly commit 391bb0de9619e75293599a1be1d24322fd466f4c Author: Ruben Vorderman Date: Wed Feb 9 14:11:33 2022 +0100 Use a separate lint file commit 832a131cee403ec0ac7d983d6e82fd567ce1b246 Author: Ruben Vorderman Date: Tue Dec 14 16:32:30 2021 +0100 Use mamba-forge and mamba to install dependencies --- .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/lint-environment.yml | 9 ++++ .github/workflows/ci.yml | 30 ----------- .github/workflows/lint.yml | 93 ++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 30 deletions(-) create mode 100644 .github/lint-environment.yml delete mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/lint.yml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3b4ec9ac..372071ee 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,3 +2,4 @@ - [ ] Pull request details were added to CHANGELOG.md. - [ ] Documentation was updated (if required). - [ ] `parameter_meta` was added/updated (if required). +- [ ] Submodule branches are on develop or a tagged commit. diff --git a/.github/lint-environment.yml b/.github/lint-environment.yml new file mode 100644 index 00000000..63b538fc --- /dev/null +++ b/.github/lint-environment.yml @@ -0,0 +1,9 @@ +name: biowdl-lint +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - cromwell + - wdl-aid + - miniwdl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 78566111..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Continuous integration - -on: - pull_request: - paths_ignore: - - "docs/**" - -defaults: - run: - # This is needed for miniconda, see: - # https://github.com/marketplace/actions/setup-miniconda#important - shell: bash -l {0} - -jobs: - lint: - runs-on: ubuntu-latest - name: Womtool validate and submodule up to date. - steps: - - uses: actions/checkout@v2.3.4 - with: - submodules: recursive - - name: install miniconda - uses: conda-incubator/setup-miniconda@v2.0.1 - with: - channels: conda-forge,bioconda,defaults - # Conda-incubator uses 'test' environment by default. - - name: install requirements - run: conda install -n test cromwell miniwdl wdl-aid - - name: run linting - run: bash scripts/biowdl_lint.sh diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..e6edbbab --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,93 @@ +name: Linting + +on: + pull_request: + paths_ignore: + - "docs/**" + +defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important + shell: bash -l {0} + +jobs: + lint: + runs-on: ubuntu-latest + name: Linting checks + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + + - name: Set cache date + run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV + + - name: Cache conda environment + uses: actions/cache@v2.1.7 + env: + # Increase this value to manually invalidate the cache + CACHE_NUMBER: 0 + with: + path: /usr/share/miniconda/envs/biowdl-lint + key: + ${{runner.os}}-biowdl-lint-${{ env.CACHE_NUMBER }}-${{env.DATE}}-${{ hashFiles('.github/lint-environment.yml') }} + id: env_cache + + # Use the builtin conda. This is the fastest installation. It may not be + # the fastest for resolving, but the package cache mitigates that problem. + # Since this installs fastest, it is fastest for all runs where a cache + # hit occurs. + - name: install miniconda + uses: conda-incubator/setup-miniconda@v2.1.1 + with: + channels: conda-forge,bioconda,defaults + channel-priority: strict + auto-activate-base: false + use-only-tar-bz2: true # Needed for proper caching according to the documentation. + # activate-environment is broken! This always seems to create a new environment. + # Activation is therefore done separately. + + - name: Create test environment if no cache is present + run: conda env create -n biowdl-lint -f .github/lint-environment.yml + if: steps.env_cache.outputs.cache-hit != 'true' + + - name: Activate test environment + # The new PATH should be passed to the environment, otherwise it won't register. + run: | + conda activate biowdl-lint + echo "PATH=$PATH" >> $GITHUB_ENV + + - name: Fetch develop branch for comparisons + run: git fetch --depth=1 origin develop + + - name: run womtool validate + # Only check files that have changed from the base reference. + # Womtool validate checks very slowly, so this saves a lot of time. + run: | + bash -c " + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + womtool validate $WDL_FILE + done + " + - name: run miniwdl check + run: bash -c 'miniwdl check $(git ls-files *.wdl)' + + - name: Check copyright headers + run: | + bash -c ' + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" + done + ' + - name: Check parameter_meta for inputs + run: | + bash -c " + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || + if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr + then + exit 1 + fi + done + " From 54337a3c99596e48149d0d2522cc79c0a7b379e9 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 13:38:34 +0100 Subject: [PATCH 618/902] update peach to 1.5 --- peach.wdl | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/peach.wdl b/peach.wdl index af44daec..6a5770f4 100644 --- a/peach.wdl +++ b/peach.wdl @@ -22,7 +22,6 @@ version 1.0 task Peach { input { - File transcriptTsv File germlineVcf File germlineVcfIndex String tumorName @@ -31,28 +30,26 @@ task Peach { File panelJson String memory = "2G" - String dockerImage = "quay.io/biowdl/peach:v1.0" + String dockerImage = "quay.io/biowdl/peach:v1.5" Int timeMinutes = 5 } command { + set -e + mkdir -p ~{outputDir} peach \ - --recreate_bed \ - --transcript_tsv ~{transcriptTsv} \ - ~{germlineVcf} \ - ~{tumorName} \ - ~{normalName} \ - 1.0 \ - ~{outputDir} \ - ~{panelJson} \ - vcftools + -vcf ~{germlineVcf} \ + --sample_t_id ~{tumorName} \ + --sample_r_id ~{normalName} \ + --tool_version 1.5 \ + --outputDir ~{outputDir} \ + --panel } output { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" - File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] outputs = [callsTsv, filteredVcf, genotypeTsv] + Array[File] outputs = [callsTsv, genotypeTsv] } runtime { @@ -62,7 +59,6 @@ task Peach { } parameter_meta { - transcriptTsv: {description: "A tsv file describing transcripts.", category: "required"} germlineVcf: {description: "The germline VCF file from hmftools' purple.", category: "required"} germlineVcfIndex: {description: "The germline VCF's index.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} @@ -74,4 +70,4 @@ task Peach { memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From adb8a68ce8fff78613ee95451db821363b74353b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 16:42:36 +0100 Subject: [PATCH 619/902] Debug task --- .github/workflows/lint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e6edbbab..622e0581 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -66,6 +66,7 @@ jobs: # Womtool validate checks very slowly, so this saves a lot of time. run: | bash -c " + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do womtool validate $WDL_FILE done From 37faa1b46883bb93c6e926141d6145b3ead9fafd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 16:50:07 +0100 Subject: [PATCH 620/902] Use heredoc script --- .github/workflows/lint.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 622e0581..7eb6fe5d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -65,25 +65,25 @@ jobs: # Only check files that have changed from the base reference. # Womtool validate checks very slowly, so this saves a lot of time. run: | - bash -c " + bash <<- SCRIPT set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do womtool validate $WDL_FILE done - " + SCRIPT - name: run miniwdl check run: bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | - bash -c ' + bash <<- SCRIPT for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" done - ' + SCRIPT - name: Check parameter_meta for inputs run: | - bash -c " + bash <<- SCRIPT for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr @@ -91,4 +91,4 @@ jobs: exit 1 fi done - " + SCRIPT From 7d8cadf598e9359e6ea6d9822fe63210f026acfe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:06:44 +0100 Subject: [PATCH 621/902] Use always upload cache --- .github/workflows/lint.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7eb6fe5d..11bf7a40 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -24,7 +24,8 @@ jobs: run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - name: Cache conda environment - uses: actions/cache@v2.1.7 + # Use an always upload cache to prevent solving conda environment again and again on failing linting. + uses: pat-s/always-upload-cache@v2.1.5 env: # Increase this value to manually invalidate the cache CACHE_NUMBER: 0 From eba0865e6865217ed34de9e04ac0f4c1b86f9435 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:11:34 +0100 Subject: [PATCH 622/902] Run stuff directly in bash --- .github/workflows/lint.yml | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 11bf7a40..61e3d99f 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -66,30 +66,24 @@ jobs: # Only check files that have changed from the base reference. # Womtool validate checks very slowly, so this saves a lot of time. run: | - bash <<- SCRIPT - set -x - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - womtool validate $WDL_FILE - done - SCRIPT + set -x + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + womtool validate $WDL_FILE + done - name: run miniwdl check run: bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | - bash <<- SCRIPT - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" - done - SCRIPT + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" + done - name: Check parameter_meta for inputs run: | - bash <<- SCRIPT - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || - if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr - then - exit 1 - fi - done - SCRIPT + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || + if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr + then + exit 1 + fi + done From e72270755a25b5259f99d6e1855bf10926a2dc5d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:14:27 +0100 Subject: [PATCH 623/902] Use set -x to better see what happens --- .github/workflows/lint.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 61e3d99f..7ef19e58 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -71,15 +71,19 @@ jobs: womtool validate $WDL_FILE done - name: run miniwdl check - run: bash -c 'miniwdl check $(git ls-files *.wdl)' + run: | + set -x + bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" done - name: Check parameter_meta for inputs run: | + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr From 868f3617f22d28ae6855ed8c5d75fd76c967a5db Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 14 Feb 2022 10:51:20 +0100 Subject: [PATCH 624/902] Add format parameter to parameter_meta --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index e17d613b..2afe3bbe 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -67,6 +67,6 @@ task PeakCalling { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - + format: {description: "Which format to use. Use BAMPE for paired-end reads.", category: "common"} } } From 0f6d75c76ed78cc1847acc732fd78ca44b2646a6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 14 Feb 2022 17:04:49 +0100 Subject: [PATCH 625/902] fix some issues --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c0c835b5..27badc9b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -312,8 +312,8 @@ task Gripss { output { File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" - File filteredVcf = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz" - File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz.tbi" } runtime { @@ -629,7 +629,7 @@ task Pave { File referenceFasta File referenceFastaFai File referenceFastaDict - File refGenomeVersion + String refGenomeVersion File driverGenePanel #The following should be in the same directory. File geneDataCsv From b72d2fcff910a8a7cf3c1103f90bcf2974b75b4c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 09:16:17 +0100 Subject: [PATCH 626/902] fix Pave output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index b349038d..36909ee4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -652,7 +652,7 @@ task Pave { output { File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" - File outputVcfIndex = "~{outputVcf}.tbi" + File outputVcfIndex = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz.tbi')}" } runtime { From 0554cfe785f39b9e1ebfef4a2dda7450a4ed749b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 09:36:33 +0100 Subject: [PATCH 627/902] fix copy paste error --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 36909ee4..c9745b57 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -651,8 +651,8 @@ task Pave { } output { - File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" - File outputVcfIndex = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz.tbi')}" + File outputVcf = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz')}" + File outputVcfIndex = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz.tbi')}" } runtime { From 92d964d52ea3d64f7f927f6b41933098c4ec3678 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 12:33:02 +0100 Subject: [PATCH 628/902] fix purple outputs --- hmftools.wdl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c9745b57..2015c125 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -838,11 +838,13 @@ task Purple { } output { - File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv" + File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" - File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" + File purpleGermlineDeletionTsv = "~{outputDir}/~{tumorName}.purple.germline.deletion.tsv" + File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" + File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv" File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv" File purpleQc = "~{outputDir}/~{tumorName}.purple.qc" @@ -851,10 +853,9 @@ task Purple { File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" - File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" - File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" + File purpleVersion = "~{outputDir}/purple.version" File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png" File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png" @@ -863,19 +864,19 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File purpleVersion = "~{outputDir}/purple.version" + File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" - File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" - File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" - File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" - File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" - File circosGaps = "~{outputDir}/circos/gaps.txt" File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" + File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos" + File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf" + File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" + File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" - Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + File circosGaps = "~{outputDir}/circos/gaps.txt" + Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, From d554e60c08dee3597680cb18d9eee67201aba5ac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 13:16:33 +0100 Subject: [PATCH 629/902] fix peach command --- peach.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peach.wdl b/peach.wdl index 6a5770f4..bd8375d7 100644 --- a/peach.wdl +++ b/peach.wdl @@ -43,7 +43,7 @@ task Peach { --sample_r_id ~{normalName} \ --tool_version 1.5 \ --outputDir ~{outputDir} \ - --panel + --panel ~{panelJson} } output { From 54f323f52f7ac0d0fbbab1f893b5f8583d504791 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 13:20:54 +0100 Subject: [PATCH 630/902] fix some typos --- peach.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peach.wdl b/peach.wdl index bd8375d7..d1bc17f8 100644 --- a/peach.wdl +++ b/peach.wdl @@ -38,11 +38,11 @@ task Peach { set -e mkdir -p ~{outputDir} peach \ - -vcf ~{germlineVcf} \ + --vcf ~{germlineVcf} \ --sample_t_id ~{tumorName} \ --sample_r_id ~{normalName} \ --tool_version 1.5 \ - --outputDir ~{outputDir} \ + --outputdir ~{outputDir} \ --panel ~{panelJson} } From c675c91fbc91f932c6f5018986d025993611f8a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 24 Feb 2022 15:14:35 +0100 Subject: [PATCH 631/902] fix linx output and health-checker command --- hmftools.wdl | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2015c125..3ab203fb 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -484,8 +484,14 @@ task HealthChecker { -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -output_dir ~{outputDir} - test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' - test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' + if [ -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' ] + then + echo 'true' > '~{outputDir}/succeeded' + fi + if [ -e '~{outputDir}/~{tumorName}.HealthCheckFailed' ] + then + echo 'false' > '~{outputDir}/succeeded' + fi } output { @@ -531,6 +537,7 @@ task Linx { File lineElementCsv File knownFusionCsv File driverGenePanel + Boolean writeAllVisFusions = false #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -540,7 +547,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.17--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } command { @@ -558,7 +565,8 @@ task Linx { -check_drivers \ -driver_gene_panel ~{driverGenePanel} \ -chaining_sv_limit 0 \ - -write_vis_data + -write_vis_data \ + ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} } output { @@ -569,7 +577,6 @@ task Linx { File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" - File linxViralInserts = "~{outputDir}/~{sampleName}.linx.viral_inserts.tsv" File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" @@ -578,9 +585,9 @@ task Linx { File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" File linxVersion = "~{outputDir}/linx.version" Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, - linxLinks, linxSvs, linxViralInserts, linxVisCopyNumber, - linxVisFusion, linxVisGeneExon, linxVisProteinDomain, - linxVisSegments, linxVisSvData, linxVersion] + linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion, + linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData, + linxVersion] } runtime { @@ -600,6 +607,7 @@ task Linx { lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + writeAllVisFusions: {description: "Equivalent to the -write_all_vis_fusions flag.", category: "advanced"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} From 5e821d51571d91727357e324cc9283eafce5e427 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 24 Feb 2022 16:26:29 +0100 Subject: [PATCH 632/902] fix health checker output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3ab203fb..9a3bd437 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -495,7 +495,7 @@ task HealthChecker { } output { - Boolean succeeded = read_boolean("result") + Boolean succeeded = read_boolean("succeeded") File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" From f2cc5cc02fb5ed2376969ff745ce0d6741fc32ff Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 11:43:22 +0100 Subject: [PATCH 633/902] add LinxVisualisations --- hmftools.wdl | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 9a3bd437..c852b520 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -622,6 +622,61 @@ task Linx { } } +task LinxVisualisations { + input { + String outputDir = "./linx_visualisation" + String sample + String refGenomeVersion + Array[File]+ linxOutput + Boolean plotReportable = true + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-linx-1.18-0/sv-linx.jar \ + com.hartwig.hmftools.linx.visualiser.SvVisualiser \ + -sample ~{sample} \ + -ref_genome_version ~{refGenomeVersion} \ + -circos /usr/local/bin/circos \ + -vis_file_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ + -data_out ~{outputDir}/circos \ + -plot_out ~{outputDir}/plot \ + ~{if plotReportable then "-plot_reportable" else ""} + } + + output { + + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + sample: {description: "The sample's name.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + linxOutput: {description: "The directory containing the linx output.", category: "required"} + plotReportable: {description: "Equivalent to the -plot_reportable flag.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Pave { input { String outputDir = "./" From 8fcd2e2598fbc340abdda2b3a3d56dae04cb6bdf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 12:41:51 +0100 Subject: [PATCH 634/902] add linx visualisation output --- hmftools.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c852b520..c1a824c6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -632,7 +632,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 10 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -647,12 +647,13 @@ task LinxVisualisations { -circos /usr/local/bin/circos \ -vis_file_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ -data_out ~{outputDir}/circos \ - -plot_out ~{outputDir}/plot \ + -plot_out ~{outputDir}/plots \ ~{if plotReportable then "-plot_reportable" else ""} } output { - + Array[File] circos = glob("~{outputDir}/circos/*") + Array[File] plots = glob("~{outputDir}/plots/*" } runtime { From 97c9681b4d10a9fc5d7c2b930df9e69cba85d07c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 12:50:28 +0100 Subject: [PATCH 635/902] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index c1a824c6..7f739311 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -653,7 +653,7 @@ task LinxVisualisations { output { Array[File] circos = glob("~{outputDir}/circos/*") - Array[File] plots = glob("~{outputDir}/plots/*" + Array[File] plots = glob("~{outputDir}/plots/*") } runtime { From 2467174555e85c5b4cf819018afd44a8b5f24af8 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 14:19:08 +0100 Subject: [PATCH 636/902] update virus-interpreter to 1.2 --- hmftools.wdl | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 7f739311..65187f44 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1099,25 +1099,29 @@ task Sage { task VirusInterpreter { input { String sampleId + File purplePurityTsv + File prupleQcFile + File tumorSampleWgsMetricsFile File virusBreakendTsv File taxonomyDbTsv - File virusInterpretationTsv - File virusBlacklistTsv + File virusReportingDbTsv String outputDir = "." String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biowdl/virus-interpreter:1.0" + String dockerImage = "quay.io/biowdl/virus-interpreter:1.2" } command { - virus-interpreter -Xmx~{javaXmx} \ + virus-interpreter -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample_id ~{sampleId} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{prupleQcFile} \ + -tumor_sample_wgs_metrics_file ~{tumorSampleWgsMetricsFile} \ -virus_breakend_tsv ~{virusBreakendTsv} \ -taxonomy_db_tsv ~{taxonomyDbTsv} \ - -virus_interpretation_tsv ~{virusInterpretationTsv} \ - -virus_blacklist_tsv ~{virusBlacklistTsv} \ + -virus_reporting_db_tsv ~{virusReportingDbTsv} \ -output_dir ~{outputDir} } @@ -1133,10 +1137,12 @@ task VirusInterpreter { parameter_meta { sampleId: {description: "The name of the sample.", category: "required"} + purplePurityTsv: {description: "The purity file produced by purple.", category: "required"} + prupleQcFile: {description: "The QC file produced by purple.", category: "required"} + tumorSampleWgsMetricsFile: {description: "The picard WGS metrics file for this sample.", category: "required"} virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"} taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"} - virusInterpretationTsv: {description: "A virus interpretation tsv.", category: "required"} - virusBlacklistTsv: {description: "A virus blacklist tsv.", category: "required"} + virusReportingDbTsv: {description: "A virus reporting tsv.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From b76866a2fbe5c23961f63dfa6b68697cf3c23126 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 1 Mar 2022 15:37:19 +0100 Subject: [PATCH 637/902] update protect to 2.0 --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 65187f44..da9c6fd5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -632,7 +632,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 60 + Int timeMinutes = 1440 #FIXME String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -777,7 +777,7 @@ task Protect { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biowdl/protect:v1.4" + String dockerImage = "quay.io/biowdl/protect:v2.0" } command { From 513e64560afa2a532a791289e5ef77a90006aa50 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Mar 2022 10:23:07 +0100 Subject: [PATCH 638/902] fix health-checker --- hmftools.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 76620e3c..27b31bca 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -409,13 +409,19 @@ task HealthChecker { -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -output_dir ~{outputDir} - test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' - test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' + if [ -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' ] + then + echo 'true' > '~{outputDir}/succeeded' + fi + if [ -e '~{outputDir}/~{tumorName}.HealthCheckFailed' ] + then + echo 'false' > '~{outputDir}/succeeded' + fi } output { - Boolean succeeded = read_boolean("result") - File outputFile = if succeeded + Boolean succeeded = read_boolean("succeeded") + File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" } From 652735023d7a71738b0ccea450e4fedd27e41830 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Mar 2022 12:42:00 +0100 Subject: [PATCH 639/902] update cuppa to 1.6 --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index da9c6fd5..277c8dd4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -184,7 +184,7 @@ task Cuppa { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.4" + String dockerImage = "quay.io/biowdl/cuppa:1.6" } command { @@ -240,7 +240,7 @@ task CuppaChart { String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biowdl/cuppa:1.4" + String dockerImage = "quay.io/biowdl/cuppa:1.6" } command { From d5294222e69c6e793ea0d13e448e67b9482e5a10 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 3 Mar 2022 15:50:52 +0100 Subject: [PATCH 640/902] add orange, cupGenerateReport and (hopefully) fix sage plots --- hmftools.wdl | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 208 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 277c8dd4..75fd2d19 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -168,6 +168,62 @@ task Cobalt { } } +task CupGenerateReport { + input { + String sampleName + File cupData + String outputDir = "./cuppa" + + String memory = "5G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biowdl/cuppa:1.6" + } + + # This script writes to the directory that the input is located in. + # Giving the input directly will cause the script to write in the + # locallized input dir, which may cause issues with write permissions + # in certain execution engines or backends. We, therefore, make links + # to a working directory, and give that directory as input instead. + # We can't just use the outputDir directly. This could be an + # absolute path in which case the linking might fail due to name + # collisions. Outputs are copied to the given output dir afterwards. + command { + set -e + mkdir -p ./workdir ~{outputDir} + ln -s -t workdir ~{sep=" " cupData} + CupGenerateReport \ + ~{sampleName} \ + workdir + mv -t ~{outputDir} \ + ./workdir/~{sampleName}.cup.report.summry.png \ + ./workdir/~{sampleName}.cup.report.features.png \ + ./workdir/~{sampleName}_cup.report.pdf + } + + output { + File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summry.png" + File featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" + File reportPdf = "~{outputDir}/~{sampleName}_cup.report.pdf" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The sample id.", category: "required"} + cupData: {description: "The output produced by cuppa.", category: "required"} + outputDir: {description: "The directory the ouput will be placed in.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Cuppa { input { Array[File]+ linxOutput @@ -632,7 +688,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 1440 #FIXME + Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -678,6 +734,151 @@ task LinxVisualisations { } } +task Orange { + input { + String outputDir = "./orange" + File doidJson + Array[String] sampleDoids + String tumorName + String referenceName + File referenceMetrics + File tumorMetrics + File referenceFlagstats + File tumorFlagstats + File sageGermlineGeneCoverageTsv + File sageSomaticRefSampleBqrPlot + File sageSomaticTumorSampleBqrPlot + File purpleGeneCopyNumberTsv + File purpleGermlineDriverCatalogTsv + File purpleGermlineVariantVcf + File purpleGermlineVariantVcfIndex + Array[File]+ purplePlots + File purplePurityTsv + File purpleQcFile + File purpleSomaticDriverCatalogTsv + File purpleSomaticVariantVcf + File purpleSomaticVariantVcfIndex + File linxFusionTsv + File linxBreakendTsv + File linxDriverCatalogTsv + File linxDriverTsv + Array[File]+ linxPlots + File cuppaResultCsv + File cuppaSummaryPlot + File cuppaFeaturePlot + File chordPredictionTxt + File peachGenotypeTsv + File protectEvidenceTsv + File annotatedVirusTsv + #File pipelineVersionFile + File cohortMappingTsv + File cohortPercentilesTsv + + String memory = "17G" + String javaXmx = "16G" + Int timeMinutes = 1440 #FIXME + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -output_dir ~{outputDir} \ + -doid_json ~{doidJson} \ + -primary_tumor_doids '~{sep=";" sampleDoids}' \ + -max_evidence_level C \ + -tumor_sample_id ~{tumorName} \ + -reference_sample_id ~{referenceName} \ + -ref_sample_wgs_metrics_file ~{referenceMetrics} \ + -tumor_sample_wgs_metrics_file ~{tumorMetrics} \ + -ref_sample_flagstat_file ~{referenceFlagstats} \ + -tumor_sample_flagstat_file ~{tumorFlagstats} \ + -sage_germline_gene_coverage_tsv ~{sageGermlineGeneCoverageTsv} \ + -sage_somatic_ref_sample_bqr_plot ~{sageSomaticRefSampleBqrPlot} \ + -sage_somatic_tumor_sample_bqr_plot ~{sageSomaticTumorSampleBqrPlot} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \ + -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \ + -purple_germline_variant_vcf ~{purpleGermlineVariantVcf} \ + -purple_plot_directory ~{sub(purplePlots[0], basename(purplePlots[0]), "")} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{purpleQcFile} \ + -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \ + -purple_somatic_variant_vcf ~{purpleSomaticVariantVcf} \ + -linx_fusion_tsv ~{linxFusionTsv} \ + -linx_breakend_tsv ~{linxBreakendTsv} \ + -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \ + -linx_driver_tsv ~{linxDriverTsv} \ + -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ + -cuppa_result_csv ~{cuppaResultCsv} \ + -cuppa_summary_plot ~{cuppaSummaryPlot} \ + -cuppa_feature_plot ~{cuppaFeaturePlot} \ + -chord_prediction_txt ~{chordPredictionTxt} \ + -peach_genotype_tsv ~{peachGenotypeTsv} \ + -protect_evidence_tsv ~{protectEvidenceTsv} \ + -annotated_virus_tsv ~{annotatedVirusTsv} \ + -cohort_mapping_tsv ~{cohortMappingTsv} \ + -cohort_percentiles_tsv ~{cohortPercentilesTsv} + } + #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile} + + output { + File orangeJson = "~{outputDir}/~{tumorName}.orange.json" + File orangePdf = "~{outputDir}/~{tumorName}.orange.pdf" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "common"} + doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} + referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} + sageGermlineGeneCoverageTsv: {description: "Gene coverage file produced by the germline sage run.", category: "required"} + sageSomaticRefSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"} + sageSomaticTumorSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"} + purpleGeneCopyNumberTsv: {description: "Copy number tsv produced by purple.", category: "required"} + purpleGermlineDriverCatalogTsv: {description: "Germline driver catalog produced by purple.", category: "required"} + purpleGermlineVariantVcf: {description: "Germline variant vcf produced by purple.", category: "required"} + purplePlots: {description: "The plots generated by purple.", category: "required"} + purplePurityTsv: {description: "The purity file produced by purple.", category: "required"} + purpleQcFile: {description: "The qc file produced by purple.", category: "required"} + purpleSomaticDriverCatalogTsv: {description: "Somatic driver catalog produced by purple.", category: "required"} + purpleSomaticVariantVcf: {description: "Somatic variant vcf produced by purple.", category: "required"} + linxFusionTsv: {description: "The fusions tsv produced by linx.", category: "required"} + linxBreakendTsv: {description: "The breakend tsv produced by linx.", category: "required"} + linxDriverCatalogTsv: {description: "The driver catalog produced by linx.", category: "required"} + linxDriverTsv: {description: "The driver tsv produced by linx.", category: "required"} + linxPlots: {description: "The plots generated by linx.", category: "required"} + cuppaResultCsv: {description: "The cuppa results csv.", category: "required"} + cuppaSummaryPlot: {description: "The cuppa summary plot.", category: "required"} + cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "required"} + chordPredictionTxt: {description: "Chord prediction results.", category: "required"} + peachGenotypeTsv: {description: "Genotype tsv produced by peach.", category: "required"} + protectEvidenceTsv: {description: "Evidence tsv produced by protect.", category: "required"} + annotatedVirusTsv: {description: "Annotated virus tsv produced by virus-interpreter.", category: "required"} + #pipelineVersionFile: {description: "", category: "required"} + cohortMappingTsv: {description: "Cohort mapping file from the HMFTools resources.", category: "required"} + cohortPercentilesTsv: {description: "Cohort percentile file from the HMFTools resources.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Pave { input { String outputDir = "./" @@ -1024,7 +1225,7 @@ task Sage { String javaXmx = "50G" String memory = "51G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } command { @@ -1054,8 +1255,11 @@ task Sage { output { File outputVcf = outputPath File outputVcfIndex = outputPath + ".tbi" - # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. - # This seems to be a systemic issue with R generated plots in biocontainers... + File? referenceSageBqrPng = "~{referenceName}.sage.bqr.png" + File? referenceSageBqrTsv = "~{referenceName}.sage.bqr.tsv" + File tumorSageBqrPng = "~{tumorName}.sage.bqr.png" + File tumorSageBqrTsv = "~{tumorName}.sage.bqr.tsv" + File sageGeneCoverageTsv = "~{tumorName}.sage.gene.coverage.tsv" } runtime { From 960aa3cf0a713b6d7870b33c529e22b98b711aea Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Mar 2022 15:06:49 +0100 Subject: [PATCH 641/902] Slightly less records in RAM --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index e81cd4e3..436369d7 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,7 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" - Int maxRecordsInRam = 3000000 # Default is 500_000 but that will lead to very small files on disk. + Int maxRecordsInRam = 1500000 # Default is 500_000 but that will lead to very small files on disk. String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true From cf0b105cdf0a2ad7a2c1354857c281c18150a36b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 7 Mar 2022 10:32:35 +0100 Subject: [PATCH 642/902] Add missing whitespace. Co-authored-by: Davy Cats --- umi.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/umi.wdl b/umi.wdl index a32d646a..0dc5c55e 100644 --- a/umi.wdl +++ b/umi.wdl @@ -34,7 +34,9 @@ task BamReadNameToUmiTag { Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" } + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command <<< python < Date: Mon, 7 Mar 2022 12:15:51 +0100 Subject: [PATCH 643/902] Add parameter_meta for useSoftclippingforSupplementary --- bwa.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bwa.wdl b/bwa.wdl index 1cb170b7..373de628 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -94,6 +94,7 @@ task Mem { outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} usePostalt: {description: "Whether to use the postalt script from bwa kit."} + useSoftclippingForSupplementary: {description: "Use soft-clipping for supplementary alignments instead of hard-clipping", category: "common"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} From b070d3efbfcbd41ca3545a2eec0e5bd1a6dc2a3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 7 Mar 2022 12:19:13 +0100 Subject: [PATCH 644/902] Add parameter_meta for Picard UmiAwareMarkDuplicatesWithMateCigar --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index b6d9fadf..eea8d42f 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1160,8 +1160,10 @@ task UmiAwareMarkDuplicatesWithMateCigar { assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8ccfb0e0d3b3e31ad5aa08fc527ecaa46e77c589 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 7 Mar 2022 13:46:17 +0100 Subject: [PATCH 645/902] fix CupGenerateReport --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 75fd2d19..d9dea387 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -193,7 +193,7 @@ task CupGenerateReport { ln -s -t workdir ~{sep=" " cupData} CupGenerateReport \ ~{sampleName} \ - workdir + workdir/ mv -t ~{outputDir} \ ./workdir/~{sampleName}.cup.report.summry.png \ ./workdir/~{sampleName}.cup.report.features.png \ From 799811db76b369b057aa54555e08c3025c6905a0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:38:40 +0100 Subject: [PATCH 646/902] fix cupGenerateReport --- hmftools.wdl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index d9dea387..2e294ecd 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -195,15 +195,19 @@ task CupGenerateReport { ~{sampleName} \ workdir/ mv -t ~{outputDir} \ - ./workdir/~{sampleName}.cup.report.summry.png \ - ./workdir/~{sampleName}.cup.report.features.png \ - ./workdir/~{sampleName}_cup.report.pdf + ./workdir/~{sampleName}.cup.report.summary.png \ + ./workdir/~{sampleName}_cup_report.pdf + if [ -f ./workdir/~{sampleName}.cup.report.features.png ] + then + mv -t ~{outputDir} \ + ./workdir/~{sampleName}.cup.report.features.png + fi } output { - File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summry.png" - File featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" - File reportPdf = "~{outputDir}/~{sampleName}_cup.report.pdf" + File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summary.png" + File? featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" + File reportPdf = "~{outputDir}/~{sampleName}_cup_report.pdf" } runtime { @@ -765,7 +769,7 @@ task Orange { Array[File]+ linxPlots File cuppaResultCsv File cuppaSummaryPlot - File cuppaFeaturePlot + File? cuppaFeaturePlot File chordPredictionTxt File peachGenotypeTsv File protectEvidenceTsv @@ -812,7 +816,7 @@ task Orange { -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ -cuppa_result_csv ~{cuppaResultCsv} \ -cuppa_summary_plot ~{cuppaSummaryPlot} \ - -cuppa_feature_plot ~{cuppaFeaturePlot} \ + ~{"-cuppa_feature_plot " + cuppaFeaturePlot} \ -chord_prediction_txt ~{chordPredictionTxt} \ -peach_genotype_tsv ~{peachGenotypeTsv} \ -protect_evidence_tsv ~{protectEvidenceTsv} \ @@ -861,7 +865,7 @@ task Orange { linxPlots: {description: "The plots generated by linx.", category: "required"} cuppaResultCsv: {description: "The cuppa results csv.", category: "required"} cuppaSummaryPlot: {description: "The cuppa summary plot.", category: "required"} - cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "required"} + cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "common"} chordPredictionTxt: {description: "Chord prediction results.", category: "required"} peachGenotypeTsv: {description: "Genotype tsv produced by peach.", category: "required"} protectEvidenceTsv: {description: "Evidence tsv produced by protect.", category: "required"} From 5ae1f6de5c3c4efe38a792e3be1104bbacacea3b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:47:30 +0100 Subject: [PATCH 647/902] fix copy-paste error (orange docker image) --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2e294ecd..34941059 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -781,7 +781,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" Int timeMinutes = 1440 #FIXME - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + String dockerImage = "quay.io/quay.io/biowdl/orange:v1.6" } command { From 54d70a6b508f4a8360ce995a4bda5f6094225826 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:55:02 +0100 Subject: [PATCH 648/902] fix copy-paste error --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 34941059..5a480f93 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -781,7 +781,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" Int timeMinutes = 1440 #FIXME - String dockerImage = "quay.io/quay.io/biowdl/orange:v1.6" + String dockerImage = "quay.io/biowdl/orange:v1.6" } command { From 9ca13a0a999ff874d041d26c4860c8c07edbe92d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Mar 2022 10:16:59 +0100 Subject: [PATCH 649/902] Remove duplicate options for markduplicates --- picard.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index eea8d42f..3d835829 100644 --- a/picard.wdl +++ b/picard.wdl @@ -726,8 +726,6 @@ task MarkDuplicates { CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \ - USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { From ac55982a7acf3c06460ae0b8ac2c394865eeaa4c Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 18 Mar 2022 12:46:53 +0100 Subject: [PATCH 650/902] run tabix if vcf index is missing in gridss --- gridss.wdl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index d3d251a5..92d7df1e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -116,6 +116,12 @@ task GRIDSS { ~{normalBam} \ ~{tumorBam} samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai + + # For some reason the VCF index is sometimes missing + if [ ! -e ~{outputPrefix}.vcf.gz.tbi ] + then + tabix ~{outputPrefix}.vcf.gz + fi } output { From 173bb2e6547c1fa4ee20ec5da98368522e18b887 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 18 Mar 2022 12:49:26 +0100 Subject: [PATCH 651/902] update changelog --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c0db947..b028b60a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ The GRIDSS task will now run tabix separately if GRIDSS doesn't + produce a vcf index. + Added a task for SnpEff. + Adjusted runtime settings for sambamba Markdup. + Added a task for sambamba Flagstat. @@ -28,7 +30,7 @@ version 5.1.0-dev + Sage + VirusInterpreter + Added a task for VirusBreakend. -+ Added a task for GridssAnnotateVcfRepeatmasker. ++ Added a task for GridssAnnotateVcfRepeatmasker. + Bumped GRIDSS version to 2.12.2. + Adjusted GRIDSS runtime settings. + Added optional inputs to GRIDSS: @@ -147,7 +149,7 @@ version 4.0.0 + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. - Using more threads reduces the chance of the samtools sort pipe getting + Using more threads reduces the chance of the samtools sort pipe getting blocked if it's full. + Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl, transcriptclean.wdl to be more descriptive. From 1c02ce1ea5464c11491f9dc67802ab71cb46dbcb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 12:44:01 +0200 Subject: [PATCH 652/902] add task for sv type annotation of gridss results --- gridss.wdl | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 92d7df1e..f771ebe4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -79,6 +79,69 @@ task AnnotateInsertedSequence { } } +task AnnotateSvType { + input { + File gridssVcf + File gridssVcfIndex + String outputPath = "./gridss.svtyped.vcf" + + String memory = "32G" + String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" + Int timeMinutes = 240 + } + + # Based on https://github.com/PapenfussLab/gridss/issues/74 + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + R --vanilla << EOF + library(VariantAnnotation) + library(StructuralVariantAnnotation) + + vcf_path <- "~{gridssVcf}" + out_path <- "~{outputPath}" + + # Simple SV type classifier + simpleEventType <- function(gr) { + return(ifelse(seqnames(gr) != seqnames(partner(gr)), "BND", # inter-chromosomosal + ifelse(gr$insLen >= abs(gr$svLen) * 0.7, "INS", + ifelse(strand(gr) == strand(partner(gr)), "INV", + ifelse(xor(start(gr) < start(partner(gr)), strand(gr) == "-"), "DEL", + "DUP"))))) + } + + header <- scanVcfHeader(vcf_path) + vcf <- readVcf(vcf_path, seqinfo(header)) + gr <- breakpointRanges(vcf) + svtype <- simpleEventType(gr) + info(vcf[gr$sourceId])$SVTYPE <- svtype + writeVcf(vcf, out_path) + EOF + >>> + + output { + File vcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + gridssVcf: {description: "The VCF produced by GRIDSS.", category: "required"} + gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { File tumorBam From 87bb3c4f2104cb3c8a020aa0abfb7f5a4faa387a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 13:32:48 +0200 Subject: [PATCH 653/902] copy paste error --- gridss.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index f771ebe4..b38f344e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -134,8 +134,6 @@ task AnnotateSvType { gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 1b4238c66c6150e57e128086d16d6939a1198406 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 14:14:33 +0200 Subject: [PATCH 654/902] typo --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index b38f344e..00705392 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -79,7 +79,7 @@ task AnnotateInsertedSequence { } } -task AnnotateSvType { +task AnnotateSvTypes { input { File gridssVcf File gridssVcfIndex From bd153caa313e5fad73d2716813f7eb02c36b963c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jun 2022 14:56:45 +0200 Subject: [PATCH 655/902] adjust gridss threads --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index d3d251a5..b118af9d 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -95,7 +95,7 @@ task GRIDSS { Int jvmHeapSizeGb = 300 Int nonJvmMemoryGb = 50 - Int threads = 4 + Int threads = 16 Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } From 873ece6f64e85bea10c28754f3260de155cc8d80 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 17 Jun 2022 14:59:35 +0200 Subject: [PATCH 656/902] adjust some runtime settings --- bedtools.wdl | 2 +- gridss.wdl | 10 +++++----- hmftools.wdl | 8 ++++---- sambamba.wdl | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 1d956cab..80a281d6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -76,7 +76,7 @@ task Coverage { String outputPath = "./coverage.tsv" String memory = "8G" - Int timeMinutes = 120 + Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } diff --git a/gridss.wdl b/gridss.wdl index b118af9d..c1a41a25 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,9 +93,9 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 300 - Int nonJvmMemoryGb = 50 - Int threads = 16 + Int jvmHeapSizeGb = 64 + Int nonJvmMemoryGb = 10 + Int threads = 12 Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } @@ -216,9 +216,9 @@ task Virusbreakend { String outputPath = "./virusbreakend.vcf" String memory = "75G" - Int threads = 8 + Int threads = 12 String dockerImage = "quay.io/biowdl/gridss:2.12.2" - Int timeMinutes = 180 + Int timeMinutes = 320 } command { diff --git a/hmftools.wdl b/hmftools.wdl index 5a480f93..ef6355c4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -780,7 +780,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" - Int timeMinutes = 1440 #FIXME + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/orange:v1.6" } @@ -1225,9 +1225,9 @@ task Sage { String? mnvFilterEnabled File? coverageBed - Int threads = 4 - String javaXmx = "50G" - String memory = "51G" + Int threads = 32 + String javaXmx = "120G" + String memory = "121G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } diff --git a/sambamba.wdl b/sambamba.wdl index 4c2115e0..6696668a 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -28,7 +28,7 @@ task Flagstat { Int threads = 2 String memory = "8G" - Int timeMinutes = 120 + Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From af5cf337f77dff48e4526e1da9ca6688a1fbe56c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 22 Jun 2022 12:48:03 +0200 Subject: [PATCH 657/902] adjust sage memory and time --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index ef6355c4..6c6ef045 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1226,9 +1226,9 @@ task Sage { File? coverageBed Int threads = 32 - String javaXmx = "120G" - String memory = "121G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) + String javaXmx = "8G" + String memory = "9G" + Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } From 4608518f1afa3159658731aaac2dbfc32bedd8b8 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Jun 2022 11:09:25 +0200 Subject: [PATCH 658/902] increase sage memory --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6c6ef045..32bc24fd 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1226,8 +1226,8 @@ task Sage { File? coverageBed Int threads = 32 - String javaXmx = "8G" - String memory = "9G" + String javaXmx = "16G" + String memory = "20G" Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } From 01aa41d21addca2002f1269ba41e165c33e9e03e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 14:09:40 +0200 Subject: [PATCH 659/902] fix heredoc --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 00705392..0e8fd434 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -94,7 +94,7 @@ task AnnotateSvTypes { command <<< set -e mkdir -p "$(dirname ~{outputPath})" - R --vanilla << EOF + R --vanilla << "EOF" library(VariantAnnotation) library(StructuralVariantAnnotation) @@ -115,7 +115,7 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - writeVcf(vcf, out_path) + writeVcf(vcf, out_path, index=T) EOF >>> From 39af0ad74c6296b2f9aa536ecb2ba123a156670e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 14:12:14 +0200 Subject: [PATCH 660/902] fix output name --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 0e8fd434..d0428e59 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -83,7 +83,7 @@ task AnnotateSvTypes { input { File gridssVcf File gridssVcfIndex - String outputPath = "./gridss.svtyped.vcf" + String outputPath = "./gridss.svtyped.vcf.bgz" String memory = "32G" String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" From 4e2a09e11c36a69b84451c44bf70c50825d67746 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 15:57:39 +0200 Subject: [PATCH 661/902] detect if compressed --- gridss.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index d0428e59..c12c24d6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -90,6 +90,8 @@ task AnnotateSvTypes { Int timeMinutes = 240 } + String index = if sub(outputPath, "\\.bgz", "") != outputPath then "T" else "F" + # Based on https://github.com/PapenfussLab/gridss/issues/74 command <<< set -e @@ -115,7 +117,7 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - writeVcf(vcf, out_path, index=T) + writeVcf(vcf, out_path, index=~{index}) EOF >>> From 358c946dc86024324455193032d53873b8361d33 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 16:36:09 +0200 Subject: [PATCH 662/902] fix duoble .bgz and and index to output --- gridss.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index c12c24d6..38daa029 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -90,7 +90,9 @@ task AnnotateSvTypes { Int timeMinutes = 240 } - String index = if sub(outputPath, "\\.bgz", "") != outputPath then "T" else "F" + String effectiveOutputPath = sub(outputPath, "\\.bgz", "") + String index = if effectiveOutputPath != outputPath then "T" else "F" + # Based on https://github.com/PapenfussLab/gridss/issues/74 command <<< @@ -101,7 +103,7 @@ task AnnotateSvTypes { library(StructuralVariantAnnotation) vcf_path <- "~{gridssVcf}" - out_path <- "~{outputPath}" + out_path <- "~{effectiveOutputPath}" # Simple SV type classifier simpleEventType <- function(gr) { @@ -123,6 +125,7 @@ task AnnotateSvTypes { output { File vcf = outputPath + File? vcfIndex = outputPath + ".tbi" } runtime { From 760f89e95596cb55ef2b78c27bb61c85cadedcc2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 Jul 2022 10:13:48 +0200 Subject: [PATCH 663/902] give bcftools sort more time --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 88d97cd0..589cddea 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -187,7 +187,7 @@ task Sort { String tmpDir = "./sorting-tmp" String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 8e7ca0ce64ef97b3ba7859b245377294754edbd0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 Jul 2022 14:07:19 +0200 Subject: [PATCH 664/902] increase memory for bcftools sort --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 589cddea..2bf1c732 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -186,7 +186,7 @@ task Sort { String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" - String memory = "256M" + String memory = "5G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 3af704d65bf0ced2b0a76e049e1019031e2d1941 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 4 Jul 2022 13:04:22 +0200 Subject: [PATCH 665/902] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71309ae8..986582dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task to add SVTYPE annotations to GRIDSS results + (`AnnotateSvTypes`). + The GRIDSS task will now run tabix separately if GRIDSS doesn't produce a vcf index. + Add a script to subtract UMI's from the read name and add them as From 0f3cb30df3276150f6b168ebfc43ed596d9f140b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 4 Jul 2022 16:10:59 +0200 Subject: [PATCH 666/902] Add GT to gridss results in AnnotateSvTypes --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 38daa029..35e41d21 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -119,6 +119,8 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype + # GRIDSS doesn't supply a GT, so we estimate GT based on AF (assuming CN of 2, might be inaccurate) + geno(vcf)$GT <- ifelse(geno(vcf)$AF > 0.75, "1/1", ifelse(geno(vcf)$AF < 0.25, "0/0", "0/1")) writeVcf(vcf, out_path, index=~{index}) EOF >>> From cbd6de84edb3776aef10e774f2d15f8c29902490 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 13:20:33 +0200 Subject: [PATCH 667/902] fix typo in star GenomeGenerate parameter_meta --- star.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index aa1fd608..6a123c86 100644 --- a/star.wdl +++ b/star.wdl @@ -78,7 +78,7 @@ task GenomeGenerate { parameter_meta { # inputs - genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} + genomeDir: {description:"The directory the STAR index should be written to.", category: "common"} referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtf: {description: "The reference GTF file.", category: "common"} sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} From 9625c84b6749aa6b93f933d8a9bf307231dd73e7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 15:12:44 +0200 Subject: [PATCH 668/902] update changelog --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986582dd..afd115c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,19 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task for CupGenerateReport. ++ Updated Cuppa to version 1.6. ++ Added a task for Gripss. ++ Fixed the HealthChecker task's determination of the `succeeded` output + value. ++ Updated Linx to version 1.18. ++ Added a task for LinxVisualization. ++ Added a task for HMFtools Orange. ++ Added a task for HMFtools Pave. ++ Updated Purple to version 3.2. ++ Added plot and table outputs of Sage to task outputs. ++ Updated virus-interpreter to version 1.2. ++ Updated Peach to version 1.5. + Added a task to add SVTYPE annotations to GRIDSS results (`AnnotateSvTypes`). + The GRIDSS task will now run tabix separately if GRIDSS doesn't From 743e4e0615aa3568f391e65b3fc064e188a6f12e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 15:35:42 +0200 Subject: [PATCH 669/902] fix linting issue --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 5a480f93..628e2f9b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -190,7 +190,7 @@ task CupGenerateReport { command { set -e mkdir -p ./workdir ~{outputDir} - ln -s -t workdir ~{sep=" " cupData} + ln -s -t workdir ~{cupData} CupGenerateReport \ ~{sampleName} \ workdir/ From e996b7930959027c31a1f7a2fd4683692a13a8a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 Aug 2022 10:00:49 +0200 Subject: [PATCH 670/902] increase time for cobalt --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 32bc24fd..a59b3897 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -114,7 +114,7 @@ task Cobalt { Int threads = 1 String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 240 + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } From e43bf3e4364a919cd3b380c58bb347d6be3a8069 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 Aug 2022 10:38:48 +0200 Subject: [PATCH 671/902] update changelog --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index afd115c8..f750b212 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,22 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Bedtools coverage's timeMinutes now defaults to `320`. ++ Gridss' runtime attribute defaults were changed to: + + jvmHeapSizeGb: `64` + + nonJvmMemoryGb: `10` + + threads: `12` ++ Virusbreakend's runtime attribute defaults were changed to: + + threads: `12` + + timeMinutes: `320` ++ Cobalt's timeMinutes now defaults to `480`. ++ Orange's timeMinutes now defaults to 10. ++ Sage's runtime attributes were changed to: + + threads: `32` + + javaXmx: `"16G"` + + memory: `"20G"` + + timeMinutes: `720` ++ Sambamba's runtimeMinutes nor defaults to `320`. + Added a task for CupGenerateReport. + Updated Cuppa to version 1.6. + Added a task for Gripss. From 24cc6213026dbe1de017ebeabc2de7fbfad912ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 8 Aug 2022 11:11:48 +0200 Subject: [PATCH 672/902] make purple's somaticRainfallPlot output optional --- CHANGELOG.md | 2 ++ hmftools.wdl | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f750b212..be0e5a7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Purple's `somaticRainfallPlot` output is now optional and included in + the `plots` output as well. + Bedtools coverage's timeMinutes now defaults to `320`. + Gridss' runtime attribute defaults were changed to: + jvmHeapSizeGb: `64` diff --git a/hmftools.wdl b/hmftools.wdl index 1542bdfc..f878181a 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1133,7 +1133,7 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" + File? somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" @@ -1150,8 +1150,8 @@ task Purple { purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] - Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, - segmentPlot, somaticClonalityPlot, somaticPlot] + Array[File] plots = select_all([circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, + segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot]) Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, circosSnp] From 8993b5c662428a0bcdc5d2fd4806812b061db529 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Aug 2022 14:25:59 +0200 Subject: [PATCH 673/902] Use gebibytes instead of gigabytes --- CPAT.wdl | 4 +-- bam2fastx.wdl | 4 +-- bcftools.wdl | 10 +++---- bedtools.wdl | 16 +++++------ biowdl.wdl | 2 +- bowtie.wdl | 2 +- bwa-mem2.wdl | 4 +-- bwa.wdl | 6 ++-- ccs.wdl | 2 +- centrifuge.wdl | 10 +++---- chunked-scatter.wdl | 4 +-- clever.wdl | 4 +-- collect-columns.wdl | 2 +- common.wdl | 20 +++++++------- cutadapt.wdl | 2 +- deconstructsigs.wdl | 2 +- deepvariant.wdl | 2 +- delly.wdl | 2 +- duphold.wdl | 2 +- extractSigPredictHRD.wdl | 2 +- fastqc.wdl | 2 +- fastqsplitter.wdl | 2 +- fgbio.wdl | 2 +- flash.wdl | 2 +- gatk.wdl | 56 ++++++++++++++++++------------------- gffcompare.wdl | 4 +-- gffread.wdl | 4 +-- gridss.wdl | 10 +++---- hisat2.wdl | 2 +- hmftools.wdl | 34 +++++++++++------------ htseq.wdl | 4 +-- isoseq3.wdl | 2 +- lima.wdl | 2 +- macs2.wdl | 2 +- manta.wdl | 4 +-- minimap2.wdl | 4 +-- multiqc.wdl | 6 ++-- nanopack.wdl | 4 +-- pacbio.wdl | 4 +-- pbbam.wdl | 2 +- pbmm2.wdl | 2 +- peach.wdl | 2 +- picard.wdl | 60 ++++++++++++++++++++-------------------- prepareShiny.wdl | 4 +-- rtg.wdl | 8 +++--- sambamba.wdl | 10 +++---- samtools.wdl | 34 +++++++++++------------ scripts | 2 +- smoove.wdl | 2 +- snpeff.wdl | 2 +- somaticseq.wdl | 10 +++---- spades.wdl | 2 +- star.wdl | 10 +++---- strelka.wdl | 4 +-- stringtie.wdl | 4 +-- survivor.wdl | 2 +- talon.wdl | 20 +++++++------- transcriptclean.wdl | 6 ++-- umi-tools.wdl | 6 ++-- umi.wdl | 4 +-- unicycler.wdl | 2 +- vardict.wdl | 2 +- vt.wdl | 2 +- whatshap.wdl | 6 ++-- wisestork.wdl | 8 +++--- 65 files changed, 234 insertions(+), 234 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index e6cef3ea..b96ea0d7 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -34,8 +34,8 @@ task CPAT { Array[String]? startCodons Array[String]? stopCodons - String memory = "4G" - Int timeMinutes = 10 + ceil(size(gene, "G") * 30) + String memory = "4GiB" + Int timeMinutes = 10 + ceil(size(gene, "GiB") * 30) String dockerImage = "quay.io/biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0bdccca8..62827fd9 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -30,7 +30,7 @@ task Bam2Fasta { String? seqIdPrefix - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } @@ -98,7 +98,7 @@ task Bam2Fastq { String? seqIdPrefix - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } diff --git a/bcftools.wdl b/bcftools.wdl index 2bf1c732..726d2e37 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,7 +47,7 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -138,7 +138,7 @@ task Filter { String? softFilter String outputPath = "./filtered.vcf.gz" - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + ceil(size(vcf, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -186,7 +186,7 @@ task Sort { String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -261,7 +261,7 @@ task Stats { String? userTsTv Int threads = 0 - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -350,7 +350,7 @@ task View { String? exclude String? include - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bedtools.wdl b/bedtools.wdl index 80a281d6..fe18ede6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -26,7 +26,7 @@ task Complement { File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" - String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" + String memory = "~{512 + ceil(size([inputBed, faidx], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -75,7 +75,7 @@ task Coverage { File? bIndex String outputPath = "./coverage.tsv" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } @@ -120,7 +120,7 @@ task Merge { File inputBed String outputBed = "merged.bed" - String memory = "~{512 + ceil(size(inputBed, "M"))}M" + String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -159,7 +159,7 @@ task MergeBedFiles { Array[File]+ bedFiles String outputBed = "merged.bed" - String memory = "~{512 + ceil(size(bedFiles, "M"))}M" + String memory = "~{512 + ceil(size(bedFiles, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -207,8 +207,8 @@ task Sort { File? genome File? faidx - String memory = "~{512 + ceil(size(inputBed, "M"))}M" - Int timeMinutes = 1 + ceil(size(inputBed, "G")) + String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" + Int timeMinutes = 1 + ceil(size(inputBed, "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -267,8 +267,8 @@ task Intersect { File? faidx # Giving a faidx file will set the sorted option. - String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" - Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) + String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" + Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } diff --git a/biowdl.wdl b/biowdl.wdl index dead8303..f891618e 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -32,7 +32,7 @@ task InputConverter { Boolean checkFileMd5sums=false Boolean old=false - String memory = "128M" + String memory = "128MiB" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } diff --git a/bowtie.wdl b/bowtie.wdl index 87210dcd..7e817594 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -37,7 +37,7 @@ task Bowtie { String picardXmx = "4G" Int threads = 1 - String memory = "~{5 + ceil(size(indexFiles, "G"))}G" + String memory = "~{5 + ceil(size(indexFiles, "GiB"))}GiB" Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 4566e68c..b3db0ad1 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -36,7 +36,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "GiB") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } @@ -84,7 +84,7 @@ task Mem { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/bwa.wdl b/bwa.wdl index 373de628..d4f4495a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 300 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "GiB") * 300 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "GiB") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. @@ -81,7 +81,7 @@ task Mem { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/ccs.wdl b/ccs.wdl index 29f1a7f9..27db15ab 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -42,7 +42,7 @@ task CCS { String? chunkString Int threads = 2 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } diff --git a/centrifuge.wdl b/centrifuge.wdl index 07dc7f85..757af239 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -36,7 +36,7 @@ task Build { File? sizeTable Int threads = 5 - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -109,7 +109,7 @@ task Classify { String? excludeTaxIDs Int threads = 4 - String memory = "16G" + String memory = "16GiB" Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -186,7 +186,7 @@ task Inspect { Int? across - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -245,7 +245,7 @@ task KReport { Int? minimumScore Int? minimumLength - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -303,7 +303,7 @@ task KTimportTaxonomy { File inputFile String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 String dockerImage = "biocontainers/krona:v2.7.1_cv1" } diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 66954c36..af24b139 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -30,7 +30,7 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } @@ -84,7 +84,7 @@ task ScatterRegions { Int? scatterSize - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } diff --git a/clever.wdl b/clever.wdl index 186be514..791a0ba1 100644 --- a/clever.wdl +++ b/clever.wdl @@ -34,7 +34,7 @@ task Mateclever { Int maxOffset = 150 Int threads = 10 - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -94,7 +94,7 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "55G" + String memory = "55GiB" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } diff --git a/collect-columns.wdl b/collect-columns.wdl index 3d65c7e7..03ccb6f7 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -62,7 +62,7 @@ task CollectColumns { } runtime { - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/common.wdl b/common.wdl index 1e4fc8cb..1ce2895f 100644 --- a/common.wdl +++ b/common.wdl @@ -25,7 +25,7 @@ task AppendToStringArray { Array[String] array String string - String memory = "1G" + String memory = "1GiB" } command { @@ -51,7 +51,7 @@ task CheckFileMD5 { # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -75,7 +75,7 @@ task ConcatenateTextFiles { Boolean unzip = false Boolean zip = false - String memory = "1G" + String memory = "1GiB" } # When input and output is both compressed decompression is not needed. @@ -104,7 +104,7 @@ task Copy { Boolean recursive = false # Version not that important as long as it is stable. - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -132,7 +132,7 @@ task CreateLink { String inputFile String outputPath - String memory = "1G" + String memory = "1GiB" } command { @@ -170,7 +170,7 @@ task GetSamplePositionInArray { runtime { # 4 gigs of memory to be able to build the docker image in singularity. - memory: "4G" + memory: "4GiB" docker: dockerImage timeMinutes: 5 } @@ -190,7 +190,7 @@ task MapMd5 { input { Map[String,String] map - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -214,7 +214,7 @@ task StringArrayMd5 { input { Array[String] stringArray - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -238,7 +238,7 @@ task TextToFile { String text String outputFile = "out.txt" - String memory = "1G" + String memory = "1GiB" Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -274,7 +274,7 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - String memory = "128M" + String memory = "128MiB" Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" diff --git a/cutadapt.wdl b/cutadapt.wdl index b49a95d4..9a67692c 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -83,7 +83,7 @@ task Cutadapt { Boolean? noZeroCap Int cores = 4 - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } diff --git a/deconstructsigs.wdl b/deconstructsigs.wdl index ef47e3e3..c44bf9c0 100644 --- a/deconstructsigs.wdl +++ b/deconstructsigs.wdl @@ -27,7 +27,7 @@ task DeconstructSigs { String outputPath = "./signatures.rds" Int timeMinutes = 15 - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/r-deconstructsigs:1.9.0--r41hdfd78af_1" } diff --git a/deepvariant.wdl b/deepvariant.wdl index 28aee813..25d05bd9 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -37,7 +37,7 @@ task RunDeepVariant { String? sampleName Boolean? VCFStatsReport = true - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 5000 String dockerImage = "google/deepvariant:1.0.0" } diff --git a/delly.wdl b/delly.wdl index bf00ed36..7333c5ff 100644 --- a/delly.wdl +++ b/delly.wdl @@ -28,7 +28,7 @@ task CallSV { File referenceFastaFai String outputPath = "./delly/delly.bcf" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } diff --git a/duphold.wdl b/duphold.wdl index 80fe31d2..0426da56 100644 --- a/duphold.wdl +++ b/duphold.wdl @@ -30,7 +30,7 @@ task Duphold { String sample String outputPath = "./duphold.vcf" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 2b5d9781..1520b608 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -30,7 +30,7 @@ task ExtractSigPredictHRD { File svVcfIndex Boolean hg38 = false - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" } diff --git a/fastqc.wdl b/fastqc.wdl index 3a07db4e..d821e531 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -45,7 +45,7 @@ task Fastqc { # weird edge case fastq's. String javaXmx="1750M" Int threads = 1 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" diff --git a/fastqsplitter.wdl b/fastqsplitter.wdl index 25a50954..4a02697c 100644 --- a/fastqsplitter.wdl +++ b/fastqsplitter.wdl @@ -63,7 +63,7 @@ task Fastqsplitter { runtime { cpu: cores - memory: "~{memory}G" + memory: "~{memory}GiB" docker: dockerImage } } diff --git a/fgbio.wdl b/fgbio.wdl index d50906d3..15fb0ea4 100644 --- a/fgbio.wdl +++ b/fgbio.wdl @@ -26,7 +26,7 @@ task AnnotateBamWithUmis { File inputUmi String outputPath - String memory = "120G" + String memory = "120GiB" Int timeMinutes = 360 String javaXmx="100G" String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0" diff --git a/flash.wdl b/flash.wdl index c4554c50..7b50e0d7 100644 --- a/flash.wdl +++ b/flash.wdl @@ -34,7 +34,7 @@ task Flash { Int? maxOverlap Int threads = 2 - String memory = "2G" + String memory = "2GiB" } command { diff --git a/gatk.wdl b/gatk.wdl index 5cf7c673..0b93efe6 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -34,7 +34,7 @@ task AnnotateIntervals { File? segmentalDuplicationTrack String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -129,7 +129,7 @@ task ApplyBQSR { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -197,7 +197,7 @@ task BaseRecalibrator { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -232,7 +232,7 @@ task CalculateContamination { File? normalPileups String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -279,7 +279,7 @@ task CallCopyRatioSegments { File copyRatioSegments String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -332,7 +332,7 @@ task CollectAllelicCounts { File? commonVariantSitesIndex String javaXmx = "10G" - String memory = "11G" + String memory = "11GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -390,7 +390,7 @@ task CollectReadCounts { String intervalMergingRule = "OVERLAPPING_ONLY" String javaXmx = "7G" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -449,7 +449,7 @@ task CombineGVCFs { File referenceFastaFai String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -509,7 +509,7 @@ task CombineVariants { String outputPath String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" } @@ -579,7 +579,7 @@ task CreateReadCountPanelOfNormals { File? annotatedIntervals String javaXmx = "7G" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 5 # The biocontainer causes a spark related error for some reason. String dockerImage = "broadinstitute/gatk:4.1.8.0" @@ -629,7 +629,7 @@ task DenoiseReadCounts { File? annotatedIntervals String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -690,7 +690,7 @@ task FilterMutectCalls { File? artifactPriors String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -775,7 +775,7 @@ task GatherBqsrReports { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -805,7 +805,7 @@ task GenomicsDBImport { String? tmpDir String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -866,7 +866,7 @@ task GenotypeGVCFs { File? pedigree String javaXmx = "6G" - String memory = "7G" + String memory = "7GiB" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -932,7 +932,7 @@ task GetPileupSummaries { String outputPrefix String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1032,7 +1032,7 @@ task HaplotypeCaller { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -1073,7 +1073,7 @@ task LearnReadOrientationModel { Array[File]+ f1r2TarGz String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1114,7 +1114,7 @@ task MergeStats { Array[File]+ stats String javaXmx = "14G" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1162,7 +1162,7 @@ task ModelSegments { File? normalAllelicCounts String javaXmx = "10G" - String memory = "11G" + String memory = "11GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1250,7 +1250,7 @@ task MuTect2 { File? panelOfNormalsIndex String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1325,7 +1325,7 @@ task PlotDenoisedCopyRatios { Int? minimumContigLength String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1393,7 +1393,7 @@ task PlotModeledSegments { Int? minimumContigLength String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1454,7 +1454,7 @@ task PreprocessIntervals { File? intervals String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1516,7 +1516,7 @@ task SelectVariants { String? selectTypeToInclude String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1576,7 +1576,7 @@ task SplitNCigarReads { Array[File] intervals = [] String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1645,7 +1645,7 @@ task VariantEval { File? dbsnpVCFIndex String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" @@ -1722,7 +1722,7 @@ task VariantFiltration { Array[File] intervals = [] String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } diff --git a/gffcompare.wdl b/gffcompare.wdl index d06602bc..fe1db0a8 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -46,8 +46,8 @@ task GffCompare { Int? maxDistanceGroupingTranscriptStartSites String? namePrefix - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputGtfFiles, "GiB") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" # This workaround only works in the input section. diff --git a/gffread.wdl b/gffread.wdl index a04540f5..26a2773c 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,8 +32,8 @@ task GffRead { String? proteinFastaPath String? filteredGffPath - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputGff, "GiB") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } diff --git a/gridss.wdl b/gridss.wdl index add3c08f..cfe53751 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 120 } @@ -85,7 +85,7 @@ task AnnotateSvTypes { File gridssVcfIndex String outputPath = "./gridss.svtyped.vcf.bgz" - String memory = "32G" + String memory = "32GiB" String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" Int timeMinutes = 240 } @@ -201,7 +201,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}G" + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}GiB" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -239,7 +239,7 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "25G" + String memory = "25GiB" Int threads = 8 String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 1440 @@ -289,7 +289,7 @@ task Virusbreakend { File virusbreakendDB String outputPath = "./virusbreakend.vcf" - String memory = "75G" + String memory = "75GiB" Int threads = 12 String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 320 diff --git a/hisat2.wdl b/hisat2.wdl index a2c0777c..50fabc9d 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -82,7 +82,7 @@ task Hisat2 { runtime { cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/hmftools.wdl b/hmftools.wdl index f878181a..26ab4e4a 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,7 +35,7 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "70G" + String memory = "70GiB" String javaXmx = "64G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" @@ -112,7 +112,7 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = "5G" + String memory = "5GiB" String javaXmx = "4G" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" @@ -174,7 +174,7 @@ task CupGenerateReport { File cupData String outputDir = "./cuppa" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -242,7 +242,7 @@ task Cuppa { String outputDir = "./cuppa" String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -298,7 +298,7 @@ task CuppaChart { File cupData String outputDir = "./cuppa" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -348,7 +348,7 @@ task Gripss { File vcfIndex String outputDir = "./" - String memory = "17G" + String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 50 String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" @@ -419,7 +419,7 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "32G" + String memory = "32GiB" String javaXmx = "31G" Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" @@ -478,7 +478,7 @@ task GripssHardFilterApplicationKt { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" - String memory = "3G" + String memory = "3GiB" String javaXmx = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" @@ -527,7 +527,7 @@ task HealthChecker { Array[File]+ purpleOutput String javaXmx = "2G" - String memory = "1G" + String memory = "3GiB" Int timeMinutes = 1 String dockerImage = "quay.io/biowdl/health-checker:3.2" } @@ -604,7 +604,7 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9G" + String memory = "9iB" String javaXmx = "8G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" @@ -690,7 +690,7 @@ task LinxVisualisations { Array[File]+ linxOutput Boolean plotReportable = true - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" @@ -778,7 +778,7 @@ task Orange { File cohortMappingTsv File cohortPercentilesTsv - String memory = "17G" + String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/orange:v1.6" @@ -902,7 +902,7 @@ task Pave { Int timeMinutes = 50 String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" String dockerImage = "quay.io/biowdl/pave:v1.0" } @@ -979,7 +979,7 @@ task Protect { File chordPrediction File annotatedVirus - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biowdl/protect:v2.0" @@ -1078,7 +1078,7 @@ task Purple { Int threads = 1 Int timeMinutes = 30 - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" @@ -1227,7 +1227,7 @@ task Sage { Int threads = 32 String javaXmx = "16G" - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } @@ -1315,7 +1315,7 @@ task VirusInterpreter { File virusReportingDbTsv String outputDir = "." - String memory = "3G" + String memory = "3GiB" String javaXmx = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biowdl/virus-interpreter:1.2" diff --git a/htseq.wdl b/htseq.wdl index 76d3bb83..92bc4423 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -33,8 +33,8 @@ task HTSeqCount { String? idattr Int nprocesses = 1 - String memory = "8G" - Int timeMinutes = 1440 #10 + ceil(size(inputBams, "G") * 60) FIXME + String memory = "8GiB" + Int timeMinutes = 1440 #10 + ceil(size(inputBams, "GiB") * 60) FIXME String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" } diff --git a/isoseq3.wdl b/isoseq3.wdl index aacbfc60..77f19f80 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,7 +32,7 @@ task Refine { String outputNamePrefix Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/isoseq3:3.4.0--0" } diff --git a/lima.wdl b/lima.wdl index 6b87ad4f..eece2b3f 100644 --- a/lima.wdl +++ b/lima.wdl @@ -49,7 +49,7 @@ task Lima { String outputPrefix Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/lima:2.2.0--h9ee0642_0" } diff --git a/macs2.wdl b/macs2.wdl index 2afe3bbe..e6a011ad 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -31,7 +31,7 @@ task PeakCalling { String format = "AUTO" Boolean nomodel = false Int timeMinutes = 600 # Default to 10 hours - String memory = "8G" + String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } diff --git a/manta.wdl b/manta.wdl index 1c949af2..6804f304 100644 --- a/manta.wdl +++ b/manta.wdl @@ -60,7 +60,7 @@ task Germline { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } @@ -138,7 +138,7 @@ task Somatic { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } diff --git a/minimap2.wdl b/minimap2.wdl index 50ff4db3..96cc7734 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -31,7 +31,7 @@ task Indexing { Int? splitIndex Int cores = 1 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } @@ -98,7 +98,7 @@ task Mapping { String? howToFindGTAG Int cores = 4 - String memory = "30G" + String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } diff --git a/multiqc.wdl b/multiqc.wdl index a1662937..21fc8a7d 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -57,11 +57,11 @@ task MultiQC { String? clConfig String? memory - Int timeMinutes = 10 + ceil(size(reports, "G") * 8) + Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } - Int memoryGb = 2 + ceil(size(reports, "G")) + Int memoryGb = 2 + ceil(size(reports, "GiB")) # This is where the reports end up. It does not need to be changed by the # user. It is full of symbolic links, so it is not of any use to the user @@ -139,7 +139,7 @@ task MultiQC { } runtime { - memory: select_first([memory, "~{memoryGb}G"]) + memory: select_first([memory, "~{memoryGb}GiB"]) time_minutes: timeMinutes docker: dockerImage } diff --git a/nanopack.wdl b/nanopack.wdl index e4c94a43..bd3f433e 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -40,7 +40,7 @@ task NanoPlot { String? readType Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoplot:1.38.0--pyhdfd78af_0" } @@ -130,7 +130,7 @@ task NanoQc { Int? minLength - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" } diff --git a/pacbio.wdl b/pacbio.wdl index b21c69bc..dcf0f69e 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -25,7 +25,7 @@ task mergePacBio { Array[File]+ reports String outputPathMergedReport - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/redmar_van_den_berg/pacbio-merge:0.2" } @@ -62,7 +62,7 @@ task ccsChunks { input { Int chunkCount - String memory = "4G" + String memory = "4GiB" String dockerImage = "python:3.7-slim" } diff --git a/pbbam.wdl b/pbbam.wdl index ae64b87c..d5cafed6 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -26,7 +26,7 @@ task Index { String? outputBamPath - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" } diff --git a/pbmm2.wdl b/pbmm2.wdl index 5fda1c87..ea7c05df 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -29,7 +29,7 @@ task Mapping { File queryFile Int cores = 4 - String memory = "30G" + String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" } diff --git a/peach.wdl b/peach.wdl index d1bc17f8..7da029d0 100644 --- a/peach.wdl +++ b/peach.wdl @@ -29,7 +29,7 @@ task Peach { String outputDir = "./peach" File panelJson - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biowdl/peach:v1.5" Int timeMinutes = 5 } diff --git a/picard.wdl b/picard.wdl index 3d835829..f762ecdd 100644 --- a/picard.wdl +++ b/picard.wdl @@ -27,7 +27,7 @@ task BedToIntervalList { String outputPath = "regions.interval_list" String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -88,7 +88,7 @@ task CollectHsMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the # reference fasta twice. - Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -109,7 +109,7 @@ task CollectHsMetrics { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -157,7 +157,7 @@ task CollectMultipleMetrics { Int javaXmxMb = 3072 Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. - Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -223,7 +223,7 @@ task CollectMultipleMetrics { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -281,9 +281,9 @@ task CollectRnaSeqMetrics { String strandSpecificity = "NONE" String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" # With 6 minutes per G there were several timeouts. - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 12) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -340,8 +340,8 @@ task CollectTargetedPcrMetrics { String basename String javaXmx = "3G" - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -402,7 +402,7 @@ task CollectVariantCallingMetrics { String basename String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "5G" + String memory = "5GiB" String javaXmx = "4G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -516,7 +516,7 @@ task CreateSequenceDictionary { String outputDir String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -568,7 +568,7 @@ task GatherBamFiles { Int javaXmxMb = 1024 Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 1) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -593,7 +593,7 @@ task GatherBamFiles { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -630,8 +630,8 @@ task GatherVcfs { Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size(inputVcfs, "GiB") * 2) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -700,7 +700,7 @@ task MarkDuplicates { Int javaXmxMb = 6656 # 6.5G String memoryMb = javaXmxMb + 512 - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 8) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -736,7 +736,7 @@ task MarkDuplicates { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -782,8 +782,8 @@ task MergeVCFs { Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size(inputVCFs, "GiB")) * 2 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -838,7 +838,7 @@ task SamToFastq { Boolean paired = true String javaXmx = "16G" # High memory default to avoid crashes. - String memory = "17G" + String memory = "17GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" @@ -900,7 +900,7 @@ task ScatterIntervalList { Int scatter_count String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -943,7 +943,7 @@ task SortSam { # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -971,7 +971,7 @@ task SortSam { runtime { cpu: 1 - memory: "~{1 + XmxGb}G" + memory: "~{1 + XmxGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -1004,8 +1004,8 @@ task SortVcf { File? dict String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(vcfFiles, "GiB") * 5) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -1054,8 +1054,8 @@ task RenameSample { String newSampleName String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(inputVcf, "GiB") * 2) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -1109,7 +1109,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { Boolean useJdkInflater = false Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" Int timeMinutes = 360 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } diff --git a/prepareShiny.wdl b/prepareShiny.wdl index d669e2d1..28910743 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -25,7 +25,7 @@ task CreateDesignMatrix { File countTable String shinyDir = "." - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } @@ -67,7 +67,7 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } diff --git a/rtg.wdl b/rtg.wdl index 0e86ce3f..3e9dab9b 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -27,8 +27,8 @@ task Format { String outputPath = "seq_data.sdf" String rtgMem = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(inputFiles) * 2) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(inputFiles, "GiB") * 2) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } @@ -85,8 +85,8 @@ task VcfEval { String rtgMem = "8G" Int threads = 1 # Tool default is number of cores in the system 😱. - String memory = "9G" - Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size([baseline, calls], "GiB") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } diff --git a/sambamba.wdl b/sambamba.wdl index 6696668a..be347f94 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -27,7 +27,7 @@ task Flagstat { String outputPath = "./flagstat.txt" Int threads = 2 - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -84,7 +84,7 @@ task Markdup { # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -113,7 +113,7 @@ task Markdup { runtime { cpu: threads - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -149,7 +149,7 @@ task Sort { Int memoryPerThreadGb = 4 Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -177,7 +177,7 @@ task Sort { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } diff --git a/samtools.wdl b/samtools.wdl index 81b6c17d..e1b08173 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,8 +26,8 @@ task BgzipAndIndex { String outputDir String type = "vcf" - String memory = "2G" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(inputFile, "GiB")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -71,7 +71,7 @@ task Faidx { File inputFile String outputDir - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -119,7 +119,7 @@ task Fastq { Int? compressionLevel Int threads = 1 - String memory = "1G" + String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam) * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -183,8 +183,8 @@ task FilterShortReadsBam { File bamFile String outputPathBam - String memory = "1G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -229,7 +229,7 @@ task Flagstat { File inputBam String outputPath - String memory = "256M" # Only 40.5 MiB used for 150G bam file. + String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -269,8 +269,8 @@ task Index { String? outputBamPath - String memory = "2G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -321,7 +321,7 @@ task Markdup { File inputBam String outputBamPath - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -359,8 +359,8 @@ task Merge { Boolean force = true Int threads = 1 - String memory = "4G" - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -415,7 +415,7 @@ task Sort { Int memoryPerThreadGb = 4 Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -444,7 +444,7 @@ task Sort { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -473,7 +473,7 @@ task Tabix { String outputFilePath = "indexed.vcf.gz" String type = "vcf" - Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -526,8 +526,8 @@ task View { Int? MAPQthreshold Int threads = 1 - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } diff --git a/scripts b/scripts index 84690a30..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 diff --git a/smoove.wdl b/smoove.wdl index d1011f6c..7a1ac38b 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -29,7 +29,7 @@ task Call { String sample String outputDir = "./smoove" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" } diff --git a/snpeff.wdl b/snpeff.wdl index 4a3640c7..0f14e5b5 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -36,7 +36,7 @@ task SnpEff { Boolean noShiftHgvs = false Int? upDownStreamLen - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" diff --git a/somaticseq.wdl b/somaticseq.wdl index 63f8362e..7656d086 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -47,7 +47,7 @@ task ParallelPaired { File? strelkaSNV File? strelkaIndel - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -162,7 +162,7 @@ task ParallelPairedTrain { File? strelkaSNV File? strelkaIndel - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -270,7 +270,7 @@ task ParallelSingle { File? scalpelVCF File? strelkaVCF - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -362,7 +362,7 @@ task ParallelSingleTrain { File? scalpelVCF File? strelkaVCF - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -441,7 +441,7 @@ task ModifyStrelka { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 20 String dockerImage = "lethalfang/somaticseq:3.1.0" } diff --git a/spades.wdl b/spades.wdl index 3975dd32..d717ab28 100644 --- a/spades.wdl +++ b/spades.wdl @@ -100,6 +100,6 @@ task Spades { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" } } diff --git a/star.wdl b/star.wdl index 6a123c86..88d3c838 100644 --- a/star.wdl +++ b/star.wdl @@ -29,8 +29,8 @@ task GenomeGenerate { Int? sjdbOverhang Int threads = 4 - String memory = "32G" - Int timeMinutes = ceil(size(referenceFasta, "G") * 240 / threads) + String memory = "32GiB" + Int timeMinutes = ceil(size(referenceFasta, "GiB") * 240 / threads) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -130,12 +130,12 @@ task Star { Int runThreadN = 4 String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. - Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 300 / runThreadN) + Int timeMinutes = 1 + ceil(size(indexFiles, "GiB")) + ceil(size(flatten([inputR1, inputR2]), "GiB") * 300 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. - Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) + Int memoryGb = 1 + ceil(size(indexFiles, "GiB") * 1.3) # For some reason doing above calculation inside a string does not work. # So we solve it with an optional memory string and using select_first # in the runtime section. @@ -172,7 +172,7 @@ task Star { runtime { cpu: runThreadN - memory: select_first([memory, "~{memoryGb}G"]) + memory: select_first([memory, "~{memoryGb}GiB"]) time_minutes: timeMinutes docker: dockerImage } diff --git a/strelka.wdl b/strelka.wdl index be08e386..39afe172 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -63,7 +63,7 @@ task Germline { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -139,7 +139,7 @@ task Somatic { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/stringtie.wdl b/stringtie.wdl index 9c2f3cfc..fbe7e442 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -34,7 +34,7 @@ task Stringtie { Float? minimumCoverage Int threads = 1 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) String dockerImage = "quay.io/biocontainers/stringtie:1.3.6--h92e31bf_0" } @@ -102,7 +102,7 @@ task Merge { Float? minimumIsoformFraction String? label - String memory = "10G" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20) String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } diff --git a/survivor.wdl b/survivor.wdl index de232405..b233fb52 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -31,7 +31,7 @@ task Merge { Int minSize = 30 String outputPath = "./survivor/merged.vcf" - String memory = "24G" + String memory = "24GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } diff --git a/talon.wdl b/talon.wdl index 61f5eb4a..2f93e36b 100644 --- a/talon.wdl +++ b/talon.wdl @@ -30,7 +30,7 @@ task CreateAbundanceFileFromDatabase { File? whitelistFile File? datasetsFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -86,7 +86,7 @@ task CreateGtfFromDatabase { File? whitelistFile File? datasetFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -144,7 +144,7 @@ task FilterTalonTranscripts { File? datasetsFile Int? minDatasets - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -200,7 +200,7 @@ task GetReadAnnotations { File? datasetFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -248,7 +248,7 @@ task GetSpliceJunctions { String runMode = "intron" String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -302,7 +302,7 @@ task InitializeTalonDatabase { Int cutOff3p = 300 String outputPrefix - String memory = "10G" + String memory = "10GiB" Int timeMinutes = 60 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -360,7 +360,7 @@ task LabelReads { String outputPrefix Int threads = 4 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -413,7 +413,7 @@ task ReformatGtf { input { File gtfFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -454,7 +454,7 @@ task SummarizeDatasets { File? datasetGroupsCsv - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 50 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -506,7 +506,7 @@ task Talon { String outputPrefix Int threads = 4 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } diff --git a/transcriptclean.wdl b/transcriptclean.wdl index efdd95f4..8607a7a3 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -27,7 +27,7 @@ task GetSJsFromGtf { String outputPrefix Int minIntronSize = 21 - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -72,7 +72,7 @@ task GetTranscriptCleanStats { File inputSam String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -128,7 +128,7 @@ task TranscriptClean { File? variantFile Int cores = 1 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } diff --git a/umi-tools.wdl b/umi-tools.wdl index b79817c2..d8d17c48 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -30,7 +30,7 @@ task Extract { String? read2Output = "umi_extracted_R2.fastq.gz" Boolean threePrime = false - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } @@ -87,8 +87,8 @@ task Dedup { String? umiSeparator String? statsPrefix - String memory = "25G" - Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) + String memory = "25GiB" + Int timeMinutes = 30 + ceil(size(inputBam, "GiB") * 30) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } diff --git a/umi.wdl b/umi.wdl index 0dc5c55e..e7f01fc2 100644 --- a/umi.wdl +++ b/umi.wdl @@ -30,8 +30,8 @@ task BamReadNameToUmiTag { String outputPath = "output.bam" String umiTag = "RX" - String memory = "2G" - Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10) String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" } diff --git a/unicycler.wdl b/unicycler.wdl index 938d0c7e..d83db3ca 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -66,7 +66,7 @@ task Unicycler { String? lowScore Int threads = 1 - String memory = "4G" + String memory = "4GiB" } command { diff --git a/vardict.wdl b/vardict.wdl index 1c20e51c..187b4567 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -48,7 +48,7 @@ task VarDict { String javaXmx = "16G" Int threads = 1 - String memory = "18G" + String memory = "18GiB" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } diff --git a/vt.wdl b/vt.wdl index 85077dae..4da2d8cd 100644 --- a/vt.wdl +++ b/vt.wdl @@ -29,7 +29,7 @@ task Normalize { Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" } diff --git a/whatshap.wdl b/whatshap.wdl index 7307ce7c..da86ad82 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -38,7 +38,7 @@ task Phase { String? threshold String? ped - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -109,7 +109,7 @@ task Stats { String? blockList String? chromosome - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -169,7 +169,7 @@ task Haplotag { String? regions String? sample - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" diff --git a/wisestork.wdl b/wisestork.wdl index 8fb4b76b..bef54e27 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -31,7 +31,7 @@ task Count { Int? binSize File? binFile - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -69,7 +69,7 @@ task GcCorrect { Int? iter Float? fracLowess - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -129,7 +129,7 @@ task Newref { } runtime { - memory: "~{memory}G" + memory: "~{memory}GiB" docker: dockerImage } } @@ -147,7 +147,7 @@ task Zscore { Int? binSize File? binFile - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } From 5523913a18f121dcc524cac346dd82cf1162e804 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Aug 2022 14:37:42 +0200 Subject: [PATCH 674/902] Update changelog with memory change --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index be0e5a7c..5f4fed5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` + previously. The WDL spec clearly distuingishes between SI and binary + notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and + `GiB` this means java tasks such as GATK, FastQC and Picard will always + receive enough memory now. + Purple's `somaticRainfallPlot` output is now optional and included in the `plots` output as well. + Bedtools coverage's timeMinutes now defaults to `320`. From 75bb0cbcf2d2ccc57e8c5857f140cffe2a310c67 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Aug 2022 13:57:10 +0200 Subject: [PATCH 675/902] update survivor version --- CHANGELOG.md | 1 + survivor.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f4fed5b..b0b7c3e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Updated SURVIVOR version to 1.0.7 + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and diff --git a/survivor.wdl b/survivor.wdl index b233fb52..ae246f60 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -33,7 +33,7 @@ task Merge { String memory = "24GiB" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" + String dockerImage = "quay.io/biocontainers/survivor:1.0.7--hd03093a_2" } command { From bf7aba3c332a8dcabc87d22e1740049ed4bf7db4 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 7 Oct 2022 17:59:35 +0200 Subject: [PATCH 676/902] add fastp --- fastp.wdl | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 fastp.wdl diff --git a/fastp.wdl b/fastp.wdl new file mode 100644 index 00000000..8cf99d99 --- /dev/null +++ b/fastp.wdl @@ -0,0 +1,101 @@ +verison 1.0 + +# MIT License +# +# Copyright (c) 2022 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Fastp { + input { + File r1 + File r2 + String outputPathR1 + String outputPathR2 + String htmlPath + String jsonPath + + Int compressionLevel = 1 + Boolean correction = false + Int lengthRequired = 15 + Int? split + + Int threads = 4 + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / cores) + String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" + } + + String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") + + command { + set -e + mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) + # predict output paths + seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths + seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths + fastp \ + -i ~{r1} \ + ~{"-I " + r2} \ + -o ~{outputPathR1} \ + ~{"-O " + outputPathR2} \ + -h ~{htmlPath} \ + -j ~{jsonPath} \ + -z ~{compressionLevel} \ + ~{if correction then "--correction" else ""} \ + --length_required ~{lengthRequired} \ + --threads ~{threads} \ + ~{"--split " + split} \ + ~{if defined(split) then "-d 0" else ""} + } + + Array[String] r1Paths = read_lines("r1_paths") + Array[String] r2Paths = read_lines("r2_paths") + + output { + File htmlReport = htmlPath + File jsonReport = jsonPath + Array[File] clippedR1 = if defined(split) then r1Paths else [outputPathR1] + Array[File] clippedR2 = if defined(split) then r2Paths else [outputPathR2] + } + + runtime { + cpu: cores + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + r1: {description: "The R1 fastq file.", category: "required"} + r2: {description: "The R2 fastq file.", category: "required"} + outputPathR1: {description: "The output path for the R1 file.", category: "required"} + outputPathR2: {description: "The output path for the R2 file.", category: "required"} + htmlPath: {description: "The path to write the html report to.", category: "required"} + jsonPath: {description: "The path to write the json report to.", category: "required"} + compressionLevel: {description: "The compression level to use for the output.", category: "advanced"} + correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} + lengthRequired: {description: "The minimum read length.", category: "advanced"} + split: {description: "The number of chunks to split the files into.", category: "common"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 64427306fbbf58eb3ca9b3850a223d06894c9391 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 11 Oct 2022 12:13:08 +0200 Subject: [PATCH 677/902] fix some issues in fastp, add picard CollectInzertSizeMetrics --- fastp.wdl | 28 ++++++++++++++++------------ picard.wdl | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 8cf99d99..3063d012 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -1,4 +1,4 @@ -verison 1.0 +version 1.0 # MIT License # @@ -24,8 +24,8 @@ verison 1.0 task Fastp { input { - File r1 - File r2 + File read1 + File read2 String outputPathR1 String outputPathR2 String htmlPath @@ -35,24 +35,26 @@ task Fastp { Boolean correction = false Int lengthRequired = 15 Int? split + Boolean performAdapterTrimming = true Int threads = 4 String memory = "5GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / cores) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") + String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") - command { + command <<< set -e mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) # predict output paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths fastp \ - -i ~{r1} \ - ~{"-I " + r2} \ + -i ~{read1} \ + ~{"-I " + read2} \ -o ~{outputPathR1} \ ~{"-O " + outputPathR2} \ -h ~{htmlPath} \ @@ -62,8 +64,9 @@ task Fastp { --length_required ~{lengthRequired} \ --threads ~{threads} \ ~{"--split " + split} \ - ~{if defined(split) then "-d 0" else ""} - } + ~{if defined(split) then "-d 0" else ""} \ + ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} + >>> Array[String] r1Paths = read_lines("r1_paths") Array[String] r2Paths = read_lines("r2_paths") @@ -76,15 +79,15 @@ task Fastp { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage } parameter_meta { - r1: {description: "The R1 fastq file.", category: "required"} - r2: {description: "The R2 fastq file.", category: "required"} + read1: {description: "The R1 fastq file.", category: "required"} + read2: {description: "The R2 fastq file.", category: "required"} outputPathR1: {description: "The output path for the R1 file.", category: "required"} outputPathR2: {description: "The output path for the R2 file.", category: "required"} htmlPath: {description: "The path to write the html report to.", category: "required"} @@ -93,6 +96,7 @@ task Fastp { correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} lengthRequired: {description: "The minimum read length.", category: "advanced"} split: {description: "The number of chunks to split the files into.", category: "common"} + performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/picard.wdl b/picard.wdl index f762ecdd..6628cf0e 100644 --- a/picard.wdl +++ b/picard.wdl @@ -136,6 +136,58 @@ task CollectHsMetrics { } } +task CollectInsertSizeMetrics { + input { + File inputBam + File inputBamIndex + + Float? minimumPercentage + String basename = "./insertSize_metrics" + + String memory = "5GiB" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + CollectInsertSizeMetrics \ + I=~{inputBam} \ + O=~{basename}.txt \ + H=~{basename}.pdf \ + ~{"M=" + minimumPercentage} + } + + output { + File metricsTxt = "~{basename}.txt" + File metricsPdf = "~{basename}.pdf" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + minimumPercentage: {description: "Equivalent to picard CollectInsertSizeMetrics' `M` option.", category: "advanced"} + basename: {description: "The basename for the output files.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CollectMultipleMetrics { input { File inputBam From 346c0044a15279e1e3c5cd7140e24d9321255be8 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 15:21:07 +0200 Subject: [PATCH 678/902] fix fastp task --- fastp.wdl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 3063d012..c7a4d19f 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -48,7 +48,11 @@ task Fastp { command <<< set -e - mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) + mkdir -p $(dirname ~{outputPathR1}) + mkdir -p $(dirname ~{outputPathR2}) + mkdir -p $(dirname ~{htmlPath}) + mkdir -p $(dirname ~{jsonPath}) + # predict output paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths @@ -68,14 +72,11 @@ task Fastp { ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} >>> - Array[String] r1Paths = read_lines("r1_paths") - Array[String] r2Paths = read_lines("r2_paths") - output { File htmlReport = htmlPath File jsonReport = jsonPath - Array[File] clippedR1 = if defined(split) then r1Paths else [outputPathR1] - Array[File] clippedR2 = if defined(split) then r2Paths else [outputPathR2] + Array[File] clippedR1 = if defined(split) then read_lines("r1_paths") else [outputPathR1] + Array[File] clippedR2 = if defined(split) then read_lines("r2_paths") else [outputPathR2] } runtime { From 5b55e1b657b4d6d9ee189317d7cc5054493ef863 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 15:26:38 +0200 Subject: [PATCH 679/902] typo --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index c7a4d19f..572de7dc 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -66,7 +66,7 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --threads ~{threads} \ + --thread ~{threads} \ ~{"--split " + split} \ ~{if defined(split) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} From 9dda4c842ac98d083bd9c9fdeec1e97437040e65 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 16:02:40 +0200 Subject: [PATCH 680/902] increase memory for fastp --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index 572de7dc..becbaf4b 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,7 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "5GiB" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } From f8aa7e37593df2282161bc37c49a1d0b5039185b Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 18:06:18 +0200 Subject: [PATCH 681/902] increase memory for fastp --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index becbaf4b..25f09e39 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,7 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "10GiB" + String memory = "20GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } From e9215442ac12ff2f9ea4833b69daf809d8957cc6 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Fri, 21 Oct 2022 15:14:04 +0200 Subject: [PATCH 682/902] fastp: use number of splits as number of threads if set --- fastp.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 25f09e39..7f269d81 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,8 +38,8 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "20GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) + String memory = "50GiB" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } @@ -66,7 +66,7 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --thread ~{threads} \ + --thread ~{select_first([split, threads])} \ ~{"--split " + split} \ ~{if defined(split) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} @@ -80,7 +80,7 @@ task Fastp { } runtime { - cpu: threads + cpu: select_first([split, threads]) memory: memory time_minutes: timeMinutes docker: dockerImage @@ -96,9 +96,9 @@ task Fastp { compressionLevel: {description: "The compression level to use for the output.", category: "advanced"} correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} lengthRequired: {description: "The minimum read length.", category: "advanced"} - split: {description: "The number of chunks to split the files into.", category: "common"} + split: {description: "The number of chunks to split the files into. Number of threads will be set equal to the amount of splits.", category: "common"} performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} + threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From c7754754273f3ae4ce4bb34a9211cafec7880306 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 21 Oct 2022 16:48:34 +0200 Subject: [PATCH 683/902] Add a task to produce fasta indices --- biowdl.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/biowdl.wdl b/biowdl.wdl index f891618e..7392983a 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -73,3 +73,49 @@ task InputConverter { json: {description: "JSON file version of the input sample sheet."} } } + +task IndexFastaFile { + input { + File inputFile + String outputDir = "." + String javaXmx = "2G" + String memory = "3GiB" + } + String outputFile = outputDir + "/" + basename(inputFile) + # This executes both picard and samtools, so indexes are co-located in the same folder. + command <<< + set -e + mkdir -p ~{outputDir} + ln -s ~{inputFile} ~{outputFile} + picard -Xmx~{javaXmx} \ + -XX:ParallelGCThreads=1 \ + CreateSequenceDictionary \ + REFERENCE=~{inputFile} \ + OUTPUT="~{outputFile}.dict" + samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai + >>> + + output { + File outputFasta = outputFile + File outputFastaDict = outputFile + ".dict" + File outputFastaFai = outputFile + ".fai" + } + + runtime { + memory: memory + # Contains picard 2.27.4, samtools 1.15.1 + docker: "quay.io/biocontainers/mulled-v2-b0664646864bfdb46c5343b1b2b93fc05adb4b77:39a005770a3e30fb6aa3bf424b57ddf52bae7ece-0" + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + # outputs + outputFasta: {description: "Fasta file that is co-located with the indexes"} + outputFastaFai: {description: "Fasta index file for the outputFasta file."} + outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} + } +} \ No newline at end of file From 8d5a451e1d3938f62d14add4167fcf83dd9a0e70 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 24 Oct 2022 09:45:54 +0200 Subject: [PATCH 684/902] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 26ab4e4a..5776dfed 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -604,7 +604,7 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9iB" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" From f05d968d69d6c3a41b03a761a4a4838e5889df6c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 14:45:15 +0200 Subject: [PATCH 685/902] Add a Bwa index task --- bwa.wdl | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/bwa.wdl b/bwa.wdl index d4f4495a..f79a219a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -114,3 +114,29 @@ struct BwaIndex { File fastaFile Array[File] indexFiles } + +task Index { + input { + File fasta + } + File indexedFile = "reference.fasta" + + command { + set -e + cp ~{fasta} ~{indexedFile} + bwa index ~{indexedFile} + } + + output { + BwaIndex index = { + "fastaFile": indexedFile, + "indexFiles": [ + indexedFile + ".amb", + indexedFile + ".ann", + indexedFile + ".bwt", + indexedFile + ".pac", + indexedFile + ".sa" + ] + } + } +} \ No newline at end of file From 23b324ea33f63cb4901fd66528f4ecead4cab0d5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 15:02:55 +0200 Subject: [PATCH 686/902] Copy reference to prevent problems --- biowdl.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biowdl.wdl b/biowdl.wdl index 7392983a..fe49a6cf 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -86,7 +86,7 @@ task IndexFastaFile { command <<< set -e mkdir -p ~{outputDir} - ln -s ~{inputFile} ~{outputFile} + cp ~{inputFile} ~{outputFile} picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ From 4431b259d68024b057fe5cfd5dc4de2424450d4b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 15:46:09 +0200 Subject: [PATCH 687/902] Make sure index task works --- bwa.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index f79a219a..a129ebb4 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -119,7 +119,7 @@ task Index { input { File fasta } - File indexedFile = "reference.fasta" + String indexedFile = "reference.fasta" command { set -e @@ -128,9 +128,9 @@ task Index { } output { - BwaIndex index = { - "fastaFile": indexedFile, - "indexFiles": [ + BwaIndex index = object { + fastaFile: indexedFile, + indexFiles: [ indexedFile + ".amb", indexedFile + ".ann", indexedFile + ".bwt", @@ -139,4 +139,10 @@ task Index { ] } } + + runtime { + docker: "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + cpu: 1 + memory: "~{size(fasta, 'G') + 1}GiB" + } } \ No newline at end of file From af929db9c2392cdc24a3ef2e7c644ca4d055cc3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Oct 2022 14:17:40 +0200 Subject: [PATCH 688/902] Use the basename of the input file for index names --- biowdl.wdl | 11 +++++------ bwa.wdl | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index fe49a6cf..58e94df8 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -77,27 +77,27 @@ task InputConverter { task IndexFastaFile { input { File inputFile - String outputDir = "." String javaXmx = "2G" String memory = "3GiB" } - String outputFile = outputDir + "/" + basename(inputFile) + String outputFile = basename(inputFile) + # Capture .fa¸ .fna and .fasta + String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" # This executes both picard and samtools, so indexes are co-located in the same folder. command <<< set -e - mkdir -p ~{outputDir} cp ~{inputFile} ~{outputFile} picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ REFERENCE=~{inputFile} \ - OUTPUT="~{outputFile}.dict" + OUTPUT="~{outputDict}" samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai >>> output { File outputFasta = outputFile - File outputFastaDict = outputFile + ".dict" + File outputFastaDict = outputDict File outputFastaFai = outputFile + ".fai" } @@ -110,7 +110,6 @@ task IndexFastaFile { parameter_meta { # inputs inputFile: {description: "The input fasta file.", category: "required"} - outputDir: {description: "Output directory path.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} # outputs diff --git a/bwa.wdl b/bwa.wdl index a129ebb4..8f694b45 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -119,7 +119,7 @@ task Index { input { File fasta } - String indexedFile = "reference.fasta" + String indexedFile = basename(fasta) command { set -e From 2dc14b39d06dcc1c8161a9bf5840ebe5d88ccb25 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Oct 2022 14:33:20 +0200 Subject: [PATCH 689/902] Make index use the basename of the file --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index e1b08173..bee38d11 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -470,7 +470,7 @@ task Sort { task Tabix { input { File inputFile - String outputFilePath = "indexed.vcf.gz" + String outputFilePath = basename(inputFile) String type = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) From 46bf6537c1787f47b7758d350b6605dae6da00cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 26 Oct 2022 14:38:17 +0200 Subject: [PATCH 690/902] Add indexing tasks to the changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f4fed5b..d94c2b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a combined Picard CreateSequenceDictionary and samtools faidx task. ++ Add a BWA index task. + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and From c6fe0300c5d2e5275739148c051f931e717cd6f1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 11:38:30 +0200 Subject: [PATCH 691/902] Use samtools dict instead of Picard CreateSequenceDictionary --- CHANGELOG.md | 2 +- biowdl.wdl | 45 --------------------------------------------- samtools.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d94c2b56..b9df32a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- -+ Add a combined Picard CreateSequenceDictionary and samtools faidx task. ++ Add a combined samtools dict and samtools faidx task. + Add a BWA index task. + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary diff --git a/biowdl.wdl b/biowdl.wdl index 58e94df8..463dab75 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -72,49 +72,4 @@ task InputConverter { # outputs json: {description: "JSON file version of the input sample sheet."} } -} - -task IndexFastaFile { - input { - File inputFile - String javaXmx = "2G" - String memory = "3GiB" - } - String outputFile = basename(inputFile) - # Capture .fa¸ .fna and .fasta - String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" - # This executes both picard and samtools, so indexes are co-located in the same folder. - command <<< - set -e - cp ~{inputFile} ~{outputFile} - picard -Xmx~{javaXmx} \ - -XX:ParallelGCThreads=1 \ - CreateSequenceDictionary \ - REFERENCE=~{inputFile} \ - OUTPUT="~{outputDict}" - samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai - >>> - - output { - File outputFasta = outputFile - File outputFastaDict = outputDict - File outputFastaFai = outputFile + ".fai" - } - - runtime { - memory: memory - # Contains picard 2.27.4, samtools 1.15.1 - docker: "quay.io/biocontainers/mulled-v2-b0664646864bfdb46c5343b1b2b93fc05adb4b77:39a005770a3e30fb6aa3bf424b57ddf52bae7ece-0" - } - - parameter_meta { - # inputs - inputFile: {description: "The input fasta file.", category: "required"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} - # outputs - outputFasta: {description: "Fasta file that is co-located with the indexes"} - outputFastaFai: {description: "Fasta index file for the outputFasta file."} - outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} - } } \ No newline at end of file diff --git a/samtools.wdl b/samtools.wdl index bee38d11..d5e3ce0e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -66,6 +66,49 @@ task BgzipAndIndex { } } +task DictAndFaidx { + input { + File inputFile + String javaXmx = "2G" + String memory = "3GiB" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + } + + String outputFile = basename(inputFile) + # Capture .fa¸ .fna and .fasta + String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" + # This executes both dict and faidx, so indexes are co-located in the same folder. + command <<< + set -e + cp ~{inputFile} ~{outputFile} + samtools dict -o ~{outputDict} ~{outputFile} + samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai + >>> + + output { + File outputFasta = outputFile + File outputFastaDict = outputDict + File outputFastaFai = outputFile + ".fai" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + # outputs + outputFasta: {description: "Fasta file that is co-located with the indexes"} + outputFastaFai: {description: "Fasta index file for the outputFasta file."} + outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + task Faidx { input { File inputFile From 61161df05a65d5a3f3427d381254988208266c98 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 11:45:26 +0200 Subject: [PATCH 692/902] Add time_minutes dockerimage and update parameter_meta --- biowdl.wdl | 2 +- bwa.wdl | 16 ++++++++++++++-- samtools.wdl | 4 +++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index 463dab75..f891618e 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -72,4 +72,4 @@ task InputConverter { # outputs json: {description: "JSON file version of the input sample sheet."} } -} \ No newline at end of file +} diff --git a/bwa.wdl b/bwa.wdl index 8f694b45..e1e61bbe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -118,6 +118,8 @@ struct BwaIndex { task Index { input { File fasta + String dockerImage = "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + Int? timeMinutes = 5 + ceil(size(fasta, "G") * 5) } String indexedFile = basename(fasta) @@ -141,8 +143,18 @@ task Index { } runtime { - docker: "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + docker: dockerImage cpu: 1 memory: "~{size(fasta, 'G') + 1}GiB" + time_minutes: timeMinutes + } + parameter_meta { + # inputs + fasta: {description: "Reference fasta file.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + index: {description: "The produced BWA index."} } -} \ No newline at end of file +} diff --git a/samtools.wdl b/samtools.wdl index d5e3ce0e..76a07ef5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -71,6 +71,7 @@ task DictAndFaidx { File inputFile String javaXmx = "2G" String memory = "3GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -101,11 +102,12 @@ task DictAndFaidx { inputFile: {description: "The input fasta file.", category: "required"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputFasta: {description: "Fasta file that is co-located with the indexes"} outputFastaFai: {description: "Fasta index file for the outputFasta file."} outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 3c53b47f4ba4e2c75fc104dabe972a50332552e6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 12:25:53 +0200 Subject: [PATCH 693/902] Add @DavyCats' suggestions --- bwa.wdl | 1 + samtools.wdl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/bwa.wdl b/bwa.wdl index e1e61bbe..66b8e8cc 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -148,6 +148,7 @@ task Index { memory: "~{size(fasta, 'G') + 1}GiB" time_minutes: timeMinutes } + parameter_meta { # inputs fasta: {description: "Reference fasta file.", category: "required"} diff --git a/samtools.wdl b/samtools.wdl index 76a07ef5..df712e51 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -95,6 +95,8 @@ task DictAndFaidx { runtime { memory: memory docker: dockerImage + time_minutes: timeMinutes + cpu: 1 } parameter_meta { From 0632414b9ae0663431e8a25b35463c9aa83badbe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 4 Nov 2022 11:03:34 +0100 Subject: [PATCH 694/902] typo --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b522c02c..daf79c8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,6 @@ version 5.1.0-dev + Updated SURVIVOR version to 1.0.7 + Add a combined samtools dict and samtools faidx task. + Add a BWA index task. - + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and From 4a42403fb4bf27ba21f63b99c7cb75f9d13adfeb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 8 Nov 2022 16:33:30 +0100 Subject: [PATCH 695/902] Fallback to copying when hardlinking does not work --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index df712e51..587a53fb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -332,7 +332,7 @@ task Index { if [ ! -f ~{outputPath} ] then mkdir -p "$(dirname ~{outputPath})" - ln ~{bamFile} ~{outputPath} + ln ~{bamFile} ~{outputPath} || cp ~{bamFile} ~{outputPath} fi samtools index ~{outputPath} ~{bamIndexPath} ' @@ -531,7 +531,7 @@ task Tabix { mkdir -p "$(dirname ~{outputFilePath})" if [ ! -f ~{outputFilePath} ] then - ln ~{inputFile} ~{outputFilePath} + ln ~{inputFile} ~{outputFilePath} || cp ~{inputFile} ~{outputFilePath} fi tabix ~{outputFilePath} -p ~{type} } From daf19317d6f5aafc4e156910393f8bf02c012199 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Nov 2022 15:27:37 +0100 Subject: [PATCH 696/902] remove second breakends in gridss AnnotateSvTypes script --- gridss.wdl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index cfe53751..9a09bdde 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -119,9 +119,14 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - # GRIDSS doesn't supply a GT, so we estimate GT based on AF (assuming CN of 2, might be inaccurate) - geno(vcf)$GT <- ifelse(geno(vcf)$AF > 0.75, "1/1", ifelse(geno(vcf)$AF < 0.25, "0/0", "0/1")) - writeVcf(vcf, out_path, index=~{index}) + # GRIDSS doesn't supply a GT, simply set it to 0/1 + geno(vcf)$GT <- "0/1" + # Select only one breakend per event (also removes single breakends): + # sourceId ends with o or h for paired breakends, the first in the pair + # end with o the second with h. Single breakend end with b, these will + # also be removed since we can't determine the SVTYPE. + gr2 <- gr[grepl(".*o$", gr$sourceId)] + writeVcf(vcf[gr2$sourceId], out_path, index=~{index}) EOF >>> From 9cf522d5cf766ef7943226e8d4807643ee93721d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Nov 2022 16:00:25 +0100 Subject: [PATCH 697/902] fix typing issue in AnnotateSvTypes R code --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 9a09bdde..8e1474c1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -120,7 +120,7 @@ task AnnotateSvTypes { svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype # GRIDSS doesn't supply a GT, simply set it to 0/1 - geno(vcf)$GT <- "0/1" + geno(vcf)$GT <- as.matrix(sapply(row.names(vcf), function(x) {"0/1"})) # Select only one breakend per event (also removes single breakends): # sourceId ends with o or h for paired breakends, the first in the pair # end with o the second with h. Single breakend end with b, these will From 2e1c9972b01922cd915b7041b230e6287dda778b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 14 Nov 2022 13:40:06 +0100 Subject: [PATCH 698/902] fix issue where fastp errors if split is set to 1 --- fastp.wdl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 7f269d81..db4a2d40 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -41,11 +41,15 @@ task Fastp { String memory = "50GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" + + Int? noneInt } String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") + Int? effectiveSplit = if select_first([split, 1]) > 1 then split else noneInt + command <<< set -e mkdir -p $(dirname ~{outputPathR1}) @@ -54,8 +58,8 @@ task Fastp { mkdir -p $(dirname ~{jsonPath}) # predict output paths - seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths - seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths + seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths + seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths fastp \ -i ~{read1} \ ~{"-I " + read2} \ @@ -66,21 +70,21 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --thread ~{select_first([split, threads])} \ - ~{"--split " + split} \ - ~{if defined(split) then "-d 0" else ""} \ + --thread ~{select_first([effectiveSplit, threads])} \ + ~{"--split " + effectiveSplit} \ + ~{if defined(effectiveSplit) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} >>> output { File htmlReport = htmlPath File jsonReport = jsonPath - Array[File] clippedR1 = if defined(split) then read_lines("r1_paths") else [outputPathR1] - Array[File] clippedR2 = if defined(split) then read_lines("r2_paths") else [outputPathR2] + Array[File] clippedR1 = if defined(effectiveSplit) then read_lines("r1_paths") else [outputPathR1] + Array[File] clippedR2 = if defined(effectiveSplit) then read_lines("r2_paths") else [outputPathR2] } runtime { - cpu: select_first([split, threads]) + cpu: select_first([effectiveSplit, threads]) memory: memory time_minutes: timeMinutes docker: dockerImage From 636b1f0ea31168d9001ea7b45efe6d3333d944a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Nov 2022 14:12:45 +0100 Subject: [PATCH 699/902] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index daf79c8f..2c4cff52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ The GRIDSS AnnotateSvTypes task now also removes the second breakend of + the breakpoints and single breakends. This will prepare the output better + to be passed into survivor. + Updated SURVIVOR version to 1.0.7 + Add a combined samtools dict and samtools faidx task. + Add a BWA index task. From b382cf745b6d7ed389bbca4efdfa70e37070d835 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Nov 2022 14:23:00 +0100 Subject: [PATCH 700/902] adjusted runtime attributes for clever tasks --- CHANGELOG.md | 6 ++++++ clever.wdl | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c4cff52..d2e95f60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Mateclever's runtime attribute defaults were changed to: + + memory: `"250GiB"` + + timeMinutes: `2880` ++ Clever's Prediction task's runtime attribute defaults were changed to: + + memory: `"80GiB"` + + timeMinutes: `2200` + The GRIDSS AnnotateSvTypes task now also removes the second breakend of the breakpoints and single breakends. This will prepare the output better to be passed into survivor. diff --git a/clever.wdl b/clever.wdl index 791a0ba1..3b819ed2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -34,8 +34,8 @@ task Mateclever { Int maxOffset = 150 Int threads = 10 - String memory = "15GiB" - Int timeMinutes = 600 + String memory = "250GiB" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -94,8 +94,8 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "55GiB" - Int timeMinutes = 480 + String memory = "80GiB" + Int timeMinutes = 2200 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } From 522f2046d07479d1964de103f8d75a190a4a5292 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Nov 2022 11:48:50 +0100 Subject: [PATCH 701/902] increase time for Amber --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 5776dfed..3b09beb9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "70GiB" String javaXmx = "64G" - Int timeMinutes = 240 + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 78e02137e639dc35e24c6c9ac08a1efedfda7ebd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Nov 2022 17:12:07 +0100 Subject: [PATCH 702/902] increase memory for amber --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3b09beb9..e051dc99 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "70GiB" - String javaXmx = "64G" + String memory = "85GiB" + String javaXmx = "80G" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 1a80829e5bc6b9f607d3cb748f7af6c47e90f8bf Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Jan 2023 16:37:32 +0100 Subject: [PATCH 703/902] Add targets file input to samtools view --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 587a53fb..8503777c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,6 +571,7 @@ task View { Int? excludeFilter Int? excludeSpecificFilter Int? MAPQthreshold + File? targetFile Int threads = 1 String memory = "1GiB" @@ -593,6 +594,7 @@ task View { ~{"-G " + excludeSpecificFilter} \ ~{"-q " + MAPQthreshold} \ ~{"--threads " + (threads - 1)} \ + ~{"--target-file " + targetFile} \ ~{inFile} samtools index ~{outputFileName} ~{outputIndexPath} } From 1ad000b1370898459d2ef3d6e2b3939699874c4f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Jan 2023 17:07:32 +0100 Subject: [PATCH 704/902] update samtools containers --- samtools.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 8503777c..303f9821 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -72,7 +72,7 @@ task DictAndFaidx { String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputFile = basename(inputFile) @@ -119,7 +119,7 @@ task Faidx { String outputDir String memory = "2GiB" - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -168,7 +168,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -232,7 +232,7 @@ task FilterShortReadsBam { String memory = "1GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -278,7 +278,7 @@ task Flagstat { String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -318,7 +318,7 @@ task Index { String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } # Select_first is needed, otherwise womtool validate fails. @@ -369,7 +369,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -408,7 +408,7 @@ task Merge { Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -463,7 +463,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -576,7 +576,7 @@ task View { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputIndexPath = basename(outputFileName) + ".bai" From d686e0870442c002b7902e9a8f33467dc404fa14 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:15:45 +0100 Subject: [PATCH 705/902] Add parameter_meta for targetFile --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 303f9821..771a9969 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -621,6 +621,7 @@ task View { excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"} + targetFile: {description: "A BED file with regions to include", caegory: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From b52e3250eb5823b0ddbe4363eb3a77ab798d6fd0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:17:38 +0100 Subject: [PATCH 706/902] Update changelog with samtools change --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2e95f60..c6b5e609 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Update samtools image to version 1.16. ++ Add targetsFile input for samtools View. + Mateclever's runtime attribute defaults were changed to: + memory: `"250GiB"` + timeMinutes: `2880` From e1abb7dc92090bb836b6468be9ae33dc1696a44d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:21:22 +0100 Subject: [PATCH 707/902] Use latest version of scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 98cc3e10..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From 9fce64caa41bf1cd0ec5e43337a31f3c8a8466cf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 23 Jan 2023 12:07:38 +0100 Subject: [PATCH 708/902] add memory runtime attribute to tabix task --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 771a9969..fbb445e7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -542,6 +542,7 @@ task Tabix { } runtime { + memory: "2GiB" time_minutes: timeMinutes docker: dockerImage } From 5f5d51a3515b78c0d290e23a022255207c95bb7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 24 Jan 2023 16:37:48 +0100 Subject: [PATCH 709/902] add various tasks for somatic SV calling --- delly.wdl | 65 ++++++++++++++++++--- gridss.wdl | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 214 insertions(+), 12 deletions(-) diff --git a/delly.wdl b/delly.wdl index 7333c5ff..fab32784 100644 --- a/delly.wdl +++ b/delly.wdl @@ -22,15 +22,17 @@ version 1.0 task CallSV { input { - File bamFile - File bamIndex + Array[File]+ bamFile + Array[File]+ bamIndex File referenceFasta File referenceFastaFai String outputPath = "./delly/delly.bcf" + File? genotypeBcf + String memory = "15GiB" Int timeMinutes = 300 - String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" + String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" } command { @@ -39,7 +41,8 @@ task CallSV { delly call \ -o ~{outputPath} \ -g ~{referenceFasta} \ - ~{bamFile} + ~{"-v " + genotypeBcf} \ + ~{sep=" " bamFile} } output { @@ -54,11 +57,12 @@ task CallSV { parameter_meta { # inputs - bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} + bamFile: {description: "The bam files to process.", category: "required"} + bamIndex: {description: "The indexes for the bam files.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output BCF file should be written.", category: "common"} + genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples."} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -67,3 +71,50 @@ task CallSV { dellyBcf: {description: "File containing structural variants."} } } + + +task SomaticFilter { + input { + File dellyBcf + Array[String]+ normalSamples + Array[String]+ tumorSamples + String outputPath = "./delly/delly_filter.bcf" + + String memory = "15GiB" + Int timeMinutes = 300 + String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" + } + + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + for SAMPLE in ~{sep=" " normalSamples}; do echo -e "${SAMPLE}\tcontrol" >> samples.tsv; done + for SAMPLE in ~{sep=" " tumorSamples}; do echo -e "${SAMPLE}\ttumor" >> samples.tsv; done + + delly filter \ + -f somatic \ + -o ~{outputPath} \ + -s samples.tsv \ + ~{dellyBcf} + >>> + + output { + File filterBcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + dellyBcf: {description: "The BCF file produced by delly.", category: "required"} + normalSamples: {description: "The names for the normal samples as used in the delly BCF file.", category: "required"} + tumorSamples: {description: "The names for the tumor samples as used in the delly BCF file.", category: "required"} + outputPath: {description: "The location the output BCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file diff --git a/gridss.wdl b/gridss.wdl index 8e1474c1..647f2d67 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -152,11 +152,108 @@ task AnnotateSvTypes { } } +task FilterPon { + input { + File ponBed + File ponBedpe + Int minimumScore = 3 + String outputDir = "." + + String memory = "1GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 20 + } + + command { + set -e + mkdir -p ~{outputDir} + + cat ~{ponBed} | awk '{if ($5 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_single_breakend.bed + cat ~{ponBedpe} | awk '{if ($8 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_breakpoint.bedpe + } + + output { + File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" + File bed = "~{outputDir}/gridss_pon_single_breakend.bed" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + ponBed: {description: "The PON BED file.", category: "required"} + ponBedpe: {description: "The PON BEDPE file.", category: "required"} + minimumScore: {description: "The minimum number normal samples an SV must have been found in to be kept.", category: "advanced"} + outputDir: {description: "The directory the output will be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GeneratePonBedpe { + input { + Array[File]+ vcfFiles + Array[File]+ vcfIndexes + File referenceFasta + String outputDir = "." + + Int threads = 8 + String javaXmx = "8G" + String memory = "9GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 120 + } + + command { + set -e + mkdir -p ~{outputDir} + java -Xmx~{javaXmx} \ + -cp /usr/local/share/gridss-2.12.2-0/gridss.jar \ + gridss.GeneratePonBedpe \ + INPUT=~{sep=" INPUT=" vcfFiles} \ + O=~{outputDir}/gridss_pon_breakpoint.bedpe \ + SBO=~{outputDir}/gridss_pon_single_breakend.bed \ + REFERENCE_SEQUENCE=~{referenceFasta} \ + THREADS=~{threads} + } + + output { + File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" + File bed = "~{outputDir}/gridss_pon_single_breakend.bed" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} + referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { - File tumorBam - File tumorBai - String tumorLabel + Array[File]+ tumorBam + Array[File]+ tumorBai + Array[String]+ tumorLabel BwaIndex reference String outputPrefix = "gridss" @@ -184,10 +281,10 @@ task GRIDSS { ~{"-c " + gridssProperties} \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ - --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{sep="," tumorLabel} \ ~{"--blacklist " + blacklistBed} \ ~{normalBam} \ - ~{tumorBam} + ~{sep=" " tumorBam} samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai # For some reason the VCF index is sometimes missing @@ -283,6 +380,60 @@ task GridssAnnotateVcfRepeatmasker { } } +task SomaticFilter { + input { + File vcfFile + File vcfIndex + File ponBed + File ponBedpe + String outputPath = "./high_confidence_somatic.vcf.gz" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf.gz" + + String memory = "16GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 60 + } + + command { + set -e + mkdir -p $(dirname ~{outputPath}) + mkdir -p $(dirname ~{fullOutputPath}) + + gridss_somatic_filter \ + --pondir ~{dirname(ponBed)} \ + --input ~{vcfFile} \ + --output ~{outputPath} \ + --fulloutput ~{fullOutputPath} + } + + output { + File fullVcf = fullOutputPath + File fullVcfIndex = "~{fullOutputPath}.tbi" + File highConfidenceVcf = outputPath + File highConfidenceVcfIndex = "~{outputPath}.tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + vcfFile: {description: "The GRIDSS VCF file.", category: "required"} + vcfIndex: {description: "The index for the GRIDSS VCF file.", category: "required"} + ponBed: {description: "The PON BED file.", category: "required"} + ponBedpe: {description: "The PON BEDPE file.", category: "required"} + outputPath: {description: "The path the high confidence output should be written to.", category: "common"} + fullOutputPath: {description: "The path the full output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Virusbreakend { input { File bam From 90bcc945807e9ef2c13fbd542d69f3b912995a0c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 25 Jan 2023 14:06:10 +0100 Subject: [PATCH 710/902] fix lint issues --- gridss.wdl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 647f2d67..82ac7fbd 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -164,13 +164,13 @@ task FilterPon { Int timeMinutes = 20 } - command { + command <<< set -e mkdir -p ~{outputDir} cat ~{ponBed} | awk '{if ($5 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_single_breakend.bed cat ~{ponBedpe} | awk '{if ($8 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_breakpoint.bedpe - } + >>> output { File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" @@ -189,8 +189,6 @@ task FilterPon { minimumScore: {description: "The minimum number normal samples an SV must have been found in to be kept.", category: "advanced"} outputDir: {description: "The directory the output will be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -394,13 +392,15 @@ task SomaticFilter { Int timeMinutes = 60 } + String ponDir = sub(ponBed, basename(ponBed), "") + command { set -e mkdir -p $(dirname ~{outputPath}) mkdir -p $(dirname ~{fullOutputPath}) gridss_somatic_filter \ - --pondir ~{dirname(ponBed)} \ + --pondir ~{ponDir} \ --input ~{vcfFile} \ --output ~{outputPath} \ --fulloutput ~{fullOutputPath} @@ -414,7 +414,6 @@ task SomaticFilter { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage From 34b3732319f7d74c72f93ff1bcb05ccc675585f8 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 16:47:10 +0100 Subject: [PATCH 711/902] Add a number of macs2 flags so we can adhere to Encode --- macs2.wdl | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index e6a011ad..53be0abd 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -30,8 +30,15 @@ task PeakCalling { String sampleName String format = "AUTO" Boolean nomodel = false + String gensz = "hs" + Int extsize + Int shiftsize = -1*round(extsize/2) + Float pval_thres = 0.01 + Boolean bdg = true + String keepdup = "auto" + String callsummits = true Int timeMinutes = 600 # Default to 10 hours - String memory = "8GiB" + String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -43,7 +50,14 @@ task PeakCalling { --outdir ~{outDir} \ --name ~{sampleName} \ -f ~{format} \ - ~{true='--nomodel' false='' nomodel} + -g ~{gensz} \ + -p ~{pval_thres} \ + --shift ~{shiftsize} \ + --extsize ~{extsize} \ + ~{true='--nomodel' false='' nomodel} \ + ~{true='-B' false='' bdg} \ + --keep-dup ~{keepdup} \ + ~{true='--call-summits' false='' callsummits} } output { @@ -64,6 +78,13 @@ task PeakCalling { sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} + gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced."} + pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value."} + shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction"} + extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments."} + bdg: {description: "macs2 argument that ebanbles the storage of the fragment pileup, control lambda in bedGraph files."} + keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location."} + callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure."} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ae937f28ab0147b572916c97448f6c788fa58e19 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 16:55:23 +0100 Subject: [PATCH 712/902] Fix data type error --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 53be0abd..854db814 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -36,7 +36,7 @@ task PeakCalling { Float pval_thres = 0.01 Boolean bdg = true String keepdup = "auto" - String callsummits = true + Boolean callsummits = true Int timeMinutes = 600 # Default to 10 hours String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" From 2dca5f3611fd3aef0ee501cbe05467b590c93280 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 17:29:04 +0100 Subject: [PATCH 713/902] Address comments from Ruben --- macs2.wdl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 854db814..7b11c99f 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,16 +29,16 @@ task PeakCalling { String outDir = "macs2" String sampleName String format = "AUTO" - Boolean nomodel = false - String gensz = "hs" - Int extsize - Int shiftsize = -1*round(extsize/2) - Float pval_thres = 0.01 - Boolean bdg = true - String keepdup = "auto" - Boolean callsummits = true + Boolean? nomodel + String? gensz + Int? extsize + Int? shiftsize = -1*round(extsize/2) + Float? pval_thres + Boolean? bdg + String? keepdup + Boolean? callsummits Int timeMinutes = 600 # Default to 10 hours - String memory = "8G" + String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -49,14 +49,14 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ - -f ~{format} \ - -g ~{gensz} \ - -p ~{pval_thres} \ - --shift ~{shiftsize} \ - --extsize ~{extsize} \ + ~{"-f" + format} \ + ~{"-g" + gensz} \ + ~{"-p" + pval_thres} \ + ~{"--shift" + shiftsize} \ + ~{"--extsize" + extsize} \ ~{true='--nomodel' false='' nomodel} \ ~{true='-B' false='' bdg} \ - --keep-dup ~{keepdup} \ + ~{"--keep-dup" + keepdup} \ ~{true='--call-summits' false='' callsummits} } From e89b1d7d13fef289ba17ee0f6acc8e8b5415a217 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Thu, 26 Jan 2023 10:31:15 +0100 Subject: [PATCH 714/902] Delete calculation for shiftsize --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 7b11c99f..8d89f3af 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -32,7 +32,7 @@ task PeakCalling { Boolean? nomodel String? gensz Int? extsize - Int? shiftsize = -1*round(extsize/2) + Int? shiftsize Float? pval_thres Boolean? bdg String? keepdup From e996878ae65113bc66add0caaf7b5d9efc75ad73 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:13 +0100 Subject: [PATCH 715/902] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 8d89f3af..70fea707 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,7 +29,7 @@ task PeakCalling { String outDir = "macs2" String sampleName String format = "AUTO" - Boolean? nomodel + Boolean nomodel = false String? gensz Int? extsize Int? shiftsize From 055246a9082ec004ab335c7525685c888fd6e27f Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:24 +0100 Subject: [PATCH 716/902] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 70fea707..2c3bf57c 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -34,7 +34,7 @@ task PeakCalling { Int? extsize Int? shiftsize Float? pval_thres - Boolean? bdg + Boolean bdg = false String? keepdup Boolean? callsummits Int timeMinutes = 600 # Default to 10 hours From 72bbcce9084408ee7ba68a04dd8f121a8a793390 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:35 +0100 Subject: [PATCH 717/902] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 2c3bf57c..c4c08ed5 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -36,7 +36,7 @@ task PeakCalling { Float? pval_thres Boolean bdg = false String? keepdup - Boolean? callsummits + Boolean callsummits = false Int timeMinutes = 600 # Default to 10 hours String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" From 4b9754f548b8558e7de2652e257edd807d0d4ffa Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:43 +0100 Subject: [PATCH 718/902] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index c4c08ed5..9d5344ae 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -78,13 +78,13 @@ task PeakCalling { sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} - gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced."} - pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value."} - shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction"} - extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments."} - bdg: {description: "macs2 argument that ebanbles the storage of the fragment pileup, control lambda in bedGraph files."} - keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location."} - callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure."} + gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced.", category: "advanced"} + pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value.", category: "advanced"} + shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction.", category: "advanced"} + extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments.", category: "advanced"} + bdg: {description: "macs2 argument that enables the storage of the fragment pileup, control lambda in bedGraph files.", category: "advanced"} + keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location.", category: "advanced"} + callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ece0782a37451b82677eedd1ed771d823b56e891 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 11:26:19 +0100 Subject: [PATCH 719/902] Update CHANGELOG.md --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6b5e609..4962c687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + + nomodel + + gensz + + extsize + + shiftsize + + pval_thres + + bdg + + keepdup + + callsummits + Update samtools image to version 1.16. + Add targetsFile input for samtools View. + Mateclever's runtime attribute defaults were changed to: From 2b4fb7ea3fc9270af1caaea897f35d2b319c35fd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 26 Jan 2023 14:32:37 +0100 Subject: [PATCH 720/902] add missing paramter_meta --- gridss.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/gridss.wdl b/gridss.wdl index 82ac7fbd..8b27df77 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -238,6 +238,7 @@ task GeneratePonBedpe { vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "common"} + threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From 6d0329539033821b68ef31234ae7d6f920505aed Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Fri, 27 Jan 2023 09:41:54 +0100 Subject: [PATCH 721/902] Add space between flag and the value following --- macs2.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 9d5344ae..5ccc5a5f 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -49,14 +49,14 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ - ~{"-f" + format} \ - ~{"-g" + gensz} \ - ~{"-p" + pval_thres} \ - ~{"--shift" + shiftsize} \ - ~{"--extsize" + extsize} \ + ~{"-f " + format} \ + ~{"-g " + gensz} \ + ~{"-p " + pval_thres} \ + ~{"--shift " + shiftsize} \ + ~{"--extsize " + extsize} \ ~{true='--nomodel' false='' nomodel} \ ~{true='-B' false='' bdg} \ - ~{"--keep-dup" + keepdup} \ + ~{"--keep-dup " + keepdup} \ ~{true='--call-summits' false='' callsummits} } From b79e59b1f3279bfcb26446ee5c95f1c6bfb4b16e Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Fri, 27 Jan 2023 09:44:00 +0100 Subject: [PATCH 722/902] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4962c687..bd66a6ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + nomodel + gensz From ee0b137664a20f94997e9daad8b25cc2729dc88a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:25:12 +0100 Subject: [PATCH 723/902] increase time for manta, add index to delly outputs --- delly.wdl | 2 ++ manta.wdl | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/delly.wdl b/delly.wdl index fab32784..43af3ca0 100644 --- a/delly.wdl +++ b/delly.wdl @@ -47,6 +47,7 @@ task CallSV { output { File dellyBcf = outputPath + File dellyBcfIndex = outputPath + ".csi" } runtime { @@ -100,6 +101,7 @@ task SomaticFilter { output { File filterBcf = outputPath + File filterBcfIndex = outputPath + ".csi" } runtime { diff --git a/manta.wdl b/manta.wdl index 6804f304..fde8c208 100644 --- a/manta.wdl +++ b/manta.wdl @@ -34,7 +34,7 @@ task Germline { Int cores = 1 Int memoryGb = 4 - Int timeMinutes = 60 + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } @@ -102,7 +102,7 @@ task Somatic { Int cores = 1 Int memoryGb = 4 - Int timeMinutes = 60 + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } From 1bf7725df8ff78628b3444d8ab6b6daa044836fd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:28:02 +0100 Subject: [PATCH 724/902] add bcf index input for delly somatic filter --- delly.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/delly.wdl b/delly.wdl index 43af3ca0..ab72f060 100644 --- a/delly.wdl +++ b/delly.wdl @@ -77,6 +77,7 @@ task CallSV { task SomaticFilter { input { File dellyBcf + File dellyBcfIndex Array[String]+ normalSamples Array[String]+ tumorSamples String outputPath = "./delly/delly_filter.bcf" From 9af2205811e0708be46be8e88bc1c7e1387fdfda Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:33:30 +0100 Subject: [PATCH 725/902] add index to delly call inputs as well --- delly.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/delly.wdl b/delly.wdl index ab72f060..12e68187 100644 --- a/delly.wdl +++ b/delly.wdl @@ -29,6 +29,7 @@ task CallSV { String outputPath = "./delly/delly.bcf" File? genotypeBcf + File? genotypeBcfIndex String memory = "15GiB" Int timeMinutes = 300 From 71193e8da89c9275c7f6d878e349f1bdc19543ba Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:35:07 +0100 Subject: [PATCH 726/902] update parameter_meta --- delly.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index 12e68187..2dc847b9 100644 --- a/delly.wdl +++ b/delly.wdl @@ -64,7 +64,8 @@ task CallSV { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputPath: {description: "The location the output BCF file should be written.", category: "common"} - genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples."} + genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples.", category: "advanced"} + genotypeBcfIndex: {description: "The index for the genotype BCF file.", category: "advanced"} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -114,6 +115,7 @@ task SomaticFilter { parameter_meta { dellyBcf: {description: "The BCF file produced by delly.", category: "required"} + dellyBcfIndex: {description: "The index for the delly BCF file.", category: "required"} normalSamples: {description: "The names for the normal samples as used in the delly BCF file.", category: "required"} tumorSamples: {description: "The names for the tumor samples as used in the delly BCF file.", category: "required"} outputPath: {description: "The location the output BCF file should be written.", category: "common"} From dd9ea3db69c56bef6c1d5ed63c08e10e691c6d5b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Jan 2023 11:13:40 +0100 Subject: [PATCH 727/902] give delly more time, specify normal ordinal in gridss GeneratePonBedpe command --- delly.wdl | 2 +- gridss.wdl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index 2dc847b9..b952da7e 100644 --- a/delly.wdl +++ b/delly.wdl @@ -32,7 +32,7 @@ task CallSV { File? genotypeBcfIndex String memory = "15GiB" - Int timeMinutes = 300 + Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" } diff --git a/gridss.wdl b/gridss.wdl index 8b27df77..5c203a16 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -216,6 +216,7 @@ task GeneratePonBedpe { -cp /usr/local/share/gridss-2.12.2-0/gridss.jar \ gridss.GeneratePonBedpe \ INPUT=~{sep=" INPUT=" vcfFiles} \ + NO=0 \ O=~{outputDir}/gridss_pon_breakpoint.bedpe \ SBO=~{outputDir}/gridss_pon_single_breakend.bed \ REFERENCE_SEQUENCE=~{referenceFasta} \ From 48340415ab9c852ceefaf35e2b4e2ae8b47d3f66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Jan 2023 14:34:06 +0100 Subject: [PATCH 728/902] add missing fasta index input to gridss GeneratePonBedpe --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 5c203a16..03fdc6ab 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -200,6 +200,7 @@ task GeneratePonBedpe { Array[File]+ vcfFiles Array[File]+ vcfIndexes File referenceFasta + File referenceFastaFai String outputDir = "." Int threads = 8 @@ -238,6 +239,7 @@ task GeneratePonBedpe { parameter_meta { vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + referenceFastaFai: {description: "The index for the reference genome fasta.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 42796e37927b50b2dc25249a5ff92348ebf54ce0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Feb 2023 16:03:47 +0100 Subject: [PATCH 729/902] fix output paths gridss somatic filter --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 03fdc6ab..b67f4c91 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -388,8 +388,8 @@ task SomaticFilter { File vcfIndex File ponBed File ponBedpe - String outputPath = "./high_confidence_somatic.vcf.gz" - String fullOutputPath = "./high_and_low_confidence_somatic.vcf.gz" + String outputPath = "./high_confidence_somatic.vcf.bgz" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf.bgz" String memory = "16GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" From b17076a642b17212499b6478e948661b0e9433c3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Feb 2023 16:23:37 +0100 Subject: [PATCH 730/902] fix gridss somatic filter output paths? --- gridss.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b67f4c91..5aca3825 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -388,8 +388,8 @@ task SomaticFilter { File vcfIndex File ponBed File ponBedpe - String outputPath = "./high_confidence_somatic.vcf.bgz" - String fullOutputPath = "./high_and_low_confidence_somatic.vcf.bgz" + String outputPath = "./high_confidence_somatic.vcf" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf" String memory = "16GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" @@ -411,10 +411,10 @@ task SomaticFilter { } output { - File fullVcf = fullOutputPath - File fullVcfIndex = "~{fullOutputPath}.tbi" - File highConfidenceVcf = outputPath - File highConfidenceVcfIndex = "~{outputPath}.tbi" + File fullVcf = "~{fullOutputPath}.bgz" + File fullVcfIndex = "~{fullOutputPath}.bgz.tbi" + File highConfidenceVcf = "~{outputPath}.bgz" + File highConfidenceVcfIndex = "~{outputPath}.bgz.tbi" } runtime { From d320b3c79bfc321fff1178ff571af520b7969043 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 6 Feb 2023 14:11:59 +0100 Subject: [PATCH 731/902] add samples option to bcftools view --- bcftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 726d2e37..7df8911d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -349,6 +349,7 @@ task View { String? exclude String? include + Array[String] samples = [] String memory = "256MiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) @@ -364,6 +365,7 @@ task View { ~{"--exclude " + exclude} \ ~{"--include " + include} \ ~{true="--exclude-uncalled" false="" excludeUncalled} \ + ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -389,6 +391,7 @@ task View { include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} + samples: {description: "A list of sample names to include.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 3961ab4e858d31163987bb267cbad30ea085b205 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:51:45 +0100 Subject: [PATCH 732/902] Allow a custom separator char --- umi.wdl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/umi.wdl b/umi.wdl index e7f01fc2..e4270ed6 100644 --- a/umi.wdl +++ b/umi.wdl @@ -29,6 +29,7 @@ task BamReadNameToUmiTag { File inputBam String outputPath = "output.bam" String umiTag = "RX" + String separatorChar = "_" String memory = "2GiB" Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10) @@ -45,26 +46,26 @@ task BamReadNameToUmiTag { from typing import Tuple - def split_umi_from_name(name) -> Tuple[str, str]: + def split_umi_from_name(name, separator_char = "_") -> Tuple[str, str]: id_and_rest = name.split(maxsplit=1) id = id_and_rest[0] # If there was no whitespace id_and_rest will have length 1 other_parts = id_and_rest[1] if len(id_and_rest) == 2 else "" - underscore_index = id.rfind("_") + underscore_index = id.rfind(separator_char) umi = id[underscore_index + 1:] new_id = id[:underscore_index] if other_parts: return " ".join([new_id, other_parts]), umi return new_id, umi - def annotate_umis(in_file, out_file, bam_tag="RX"): + def annotate_umis(in_file, out_file, bam_tag="RX", separator_char = "_"): in_bam = pysam.AlignmentFile(in_file, "rb") os.makedirs(os.path.dirname(out_file), exist_ok=True) out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway. encoded_bam_tag = bam_tag.encode('ascii') for segment in in_bam: # type: pysam.AlignedSegment - new_name, umi = split_umi_from_name(segment.query_name) + new_name, umi = split_umi_from_name(segment.query_name, separator_char) segment.query_name = new_name # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway. # Value type has to be a string though, otherwise pysam crashes. @@ -72,7 +73,7 @@ task BamReadNameToUmiTag { out_bam.write(segment) if __name__ == "__main__": - annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}", "~{separatorChar}") pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) CODE >>> @@ -93,6 +94,7 @@ task BamReadNameToUmiTag { inputBam: {description: "The input SAM file.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "common"} umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} + separatorChar: {description: "Character used to separate the UMIs from the read name", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 84a8781c4c94be08ba0f404902378d05db18fef9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:56:03 +0100 Subject: [PATCH 733/902] Update changelog with separatorChar --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd66a6ba..4bab712a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + nomodel From b3c9204b77851836042190486f8031dbe79a9e2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:57:25 +0100 Subject: [PATCH 734/902] Add missing interpunction --- umi.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi.wdl b/umi.wdl index e4270ed6..0628783a 100644 --- a/umi.wdl +++ b/umi.wdl @@ -94,7 +94,7 @@ task BamReadNameToUmiTag { inputBam: {description: "The input SAM file.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "common"} umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} - separatorChar: {description: "Character used to separate the UMIs from the read name", category: "common"} + separatorChar: {description: "Character used to separate the UMIs from the read name.", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From c3f246f24d05bda4ebfa781cff41dfe61bbf85b3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:05:31 +0100 Subject: [PATCH 735/902] update changelog --- CHANGELOG.md | 2 ++ scripts | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bab712a..3021817d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a task for fastp. ++ Add a task for picard CollectInsertSizeMetrics. + Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: diff --git a/scripts b/scripts index 84690a30..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 From ad97efa05229f147435ee0800b0a742a2c360435 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:06:00 +0100 Subject: [PATCH 736/902] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 98cc3e10..4142daab 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 +Subproject commit 4142daab81a7d9f28686b6a3299536757d381c81 From 669428627e26aaaafdba3ab680a37236eaa736da Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:20:13 +0100 Subject: [PATCH 737/902] update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bab712a..6e1daf97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,14 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Increased the timeMinutes runtime attribute for manta (somatic and germline) to `2880`. ++ Add a task for GRIDSS somatic filtering. ++ Add a task to generate a panel of normals BED and BEDPE file for GRIDSS. ++ Add a task to filter a GRIDSS PON. ++ Add a task for delly somatic filtering. ++ Delly CallSV's `bamFile` and `bamIndex` inputs are not arrays of files, allowing + for multiple samples to be included. ++ Add `samples` input to bcftools view to select samples included in the output vcf. + Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: From 7b9e07652461788748ed4907dd8264cbbb27ce80 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:20:49 +0100 Subject: [PATCH 738/902] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 84690a30..4142daab 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 4142daab81a7d9f28686b6a3299536757d381c81 From eba9ad4c057cf7468bd7982930af484765d1a257 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Feb 2023 15:55:01 +0100 Subject: [PATCH 739/902] add some options to disable filters in fastp --- fastp.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index db4a2d40..68c0e5cd 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -36,6 +36,8 @@ task Fastp { Int lengthRequired = 15 Int? split Boolean performAdapterTrimming = true + Boolean performQualityFiltering = true + Boolean performLengthFiltering = true Int threads = 4 String memory = "50GiB" @@ -73,7 +75,9 @@ task Fastp { --thread ~{select_first([effectiveSplit, threads])} \ ~{"--split " + effectiveSplit} \ ~{if defined(effectiveSplit) then "-d 0" else ""} \ - ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} + ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} \ + ~{if performQualityFiltering then "" else "--disable_quality_filtering"} \ + ~{if performLengthFiltering then "" else "--disable_length_filtering"} >>> output { @@ -102,6 +106,8 @@ task Fastp { lengthRequired: {description: "The minimum read length.", category: "advanced"} split: {description: "The number of chunks to split the files into. Number of threads will be set equal to the amount of splits.", category: "common"} performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} + performQualityFiltering: {description: "Whether reads should be filtered based on quality scores.", category: "advanced"} + performLengthFiltering: {description: "Whether reads shoulde be filtered based on lengths.", catgegory: "advanced"} threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From ab54bb588cd66f009df79bbf00b2238f0436fad6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Mar 2023 14:11:06 +0100 Subject: [PATCH 740/902] add option to enable/disable ploy-g trimming to fastp task --- fastp.wdl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index 68c0e5cd..9849738b 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,6 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Boolean performQualityFiltering = true Boolean performLengthFiltering = true + Boolean? performPolyGTrimming Int threads = 4 String memory = "50GiB" @@ -50,6 +51,11 @@ task Fastp { String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") + String polyGTrimmingFlag = if defined(performPolyGTrimming) + then + if select_first([performPolyGTrimming]) then "--trim_poly_g" else "--disable_trim_poly_g" + else "" + Int? effectiveSplit = if select_first([split, 1]) > 1 then split else noneInt command <<< @@ -77,7 +83,8 @@ task Fastp { ~{if defined(effectiveSplit) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} \ ~{if performQualityFiltering then "" else "--disable_quality_filtering"} \ - ~{if performLengthFiltering then "" else "--disable_length_filtering"} + ~{if performLengthFiltering then "" else "--disable_length_filtering"} \ + ~{polyGTrimmingFlag} >>> output { @@ -108,6 +115,7 @@ task Fastp { performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} performQualityFiltering: {description: "Whether reads should be filtered based on quality scores.", category: "advanced"} performLengthFiltering: {description: "Whether reads shoulde be filtered based on lengths.", catgegory: "advanced"} + performPolyGTrimming: {description: "Whether or not poly-G-tail trimming should be performed. If undefined fastp's default behaviour will be used, ie. enabled for NextSeq/NovaSeq data as detected from read headers.", category: "advanced"} threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 5d35105b452167ab9e09a9b0d9c041d2af84f253 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Mar 2023 16:30:34 +0100 Subject: [PATCH 741/902] add purple options needed for shallow mode --- hmftools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index e051dc99..78156f67 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1070,6 +1070,8 @@ task Purple { File driverGenePanel File somaticHotspots File germlineHotspots + Float? highlyDiploidPercentage + Float? somaticMinPuritySpread #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -1103,6 +1105,8 @@ task Purple { -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ -driver_gene_panel ~{driverGenePanel} \ + ~{"-highly_diploid_percentage " + highlyDiploidPercentage} \ + ~{"-somatic_min_purity_spread " + somaticMinPuritySpread} \ -threads ~{threads} } From 36a4575e20c54b062995b96c24f68733affce707 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Mar 2023 11:14:24 +0100 Subject: [PATCH 742/902] update parameter_meta and changelog --- CHANGELOG.md | 2 ++ hmftools.wdl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ce03ffc..753daf30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the + hmtools PURPLE task. + Add a task for fastp. + Add a task for picard CollectInsertSizeMetrics. + Increased the timeMinutes runtime attribute for manta (somatic and germline) to `2880`. diff --git a/hmftools.wdl b/hmftools.wdl index 78156f67..c27630a1 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1186,6 +1186,8 @@ task Purple { driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} + highlyDiploidPercentage: {description: "Equivalent to PURPLE's `-highly_diploid_percentage` option.", category: "advanced"} + somaticMinPuritySpread: {description: "Equivalent to PURPLE's `-somatic_min_purity_spread` option.", category: "advanced"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} From 1a57c2ed292504f138d8bb15ae145b7145ba6c1c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 15:08:41 +0200 Subject: [PATCH 743/902] Set stable version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 753daf30..7e62171b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.1.0-dev +version 5.1.0 --------------------------- + Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the hmtools PURPLE task. From 9394a3e29a0227e3dc1dc30700ad1d7e65b7e448 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 15:10:35 +0200 Subject: [PATCH 744/902] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 09b254e9..91ff5727 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.0.0 +5.2.0 From 64aa91e7db5e96625122b4484fb7d857a9ef2c13 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 16:32:09 +0200 Subject: [PATCH 745/902] Update cutadapt and FastQC --- CHANGELOG.md | 6 ++++++ cutadapt.wdl | 2 +- fastqc.wdl | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e62171b..a13b2f6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> + +version 5.2.0-dev +--------------------------- ++ Update cutadapt version to 4.4 ++ Update FastQC version to 0.12.1 + version 5.1.0 --------------------------- + Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the diff --git a/cutadapt.wdl b/cutadapt.wdl index 9a67692c..191e6f0a 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "5GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" + String dockerImage = "quay.io/biocontainers/cutadapt:4.4--py310h1425a21_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) diff --git a/fastqc.wdl b/fastqc.wdl index d821e531..59592d4e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -47,7 +47,7 @@ task Fastqc { Int threads = 1 String memory = "2GiB" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" + String dockerImage = "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0" Array[File]? noneArray File? noneFile From 5cf560b5a9e69ba683c431193c330fdb7a41c028 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 16:55:44 +0200 Subject: [PATCH 746/902] Update classpath --- fastqc.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqc.wdl b/fastqc.wdl index 59592d4e..da31882c 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -67,7 +67,7 @@ task Fastqc { command <<< set -e mkdir -p "~{outdirPath}" - FASTQC_DIR="/usr/local/opt/fastqc-0.11.9" + FASTQC_DIR="/usr/local/opt/fastqc-0.12.1" export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \ -Xms200M -Xmx~{javaXmx} \ From 0ed76c14ffe5ab4779ed42f924fbcab1acdda266 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 1 May 2023 15:46:55 +0200 Subject: [PATCH 747/902] Stable version in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a13b2f6c..1551d13d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.2.0-dev +version 5.2.0 --------------------------- + Update cutadapt version to 4.4 + Update FastQC version to 0.12.1 From 73f769bb966f67b9bf3fd72b9f5c4d6f923ccafa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 1 May 2023 15:52:47 +0200 Subject: [PATCH 748/902] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 91ff5727..03f488b0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.2.0 +5.3.0 From 0062b727197ae2601b234d7a69ae0f64bd7b59d1 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 8 Jun 2023 13:01:16 +0200 Subject: [PATCH 749/902] Add revcomp flag to cutadapt --- CHANGELOG.md | 5 +++++ cutadapt.wdl | 3 +++ 2 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1551d13d..5eb2ef17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.3.0-dev +--------------------------- ++ Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. + + version 5.2.0 --------------------------- + Update cutadapt version to 4.4 diff --git a/cutadapt.wdl b/cutadapt.wdl index 191e6f0a..a164e360 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -81,6 +81,7 @@ task Cutadapt { Boolean? bwa Boolean? zeroCap Boolean? noZeroCap + Boolean revcomp = false Int cores = 4 String memory = "5GiB" @@ -149,6 +150,7 @@ task Cutadapt { ~{true="--bwa" false="" bwa} \ ~{true="--zero-cap" false="" zeroCap} \ ~{true="--no-zero-cap" false="" noZeroCap} \ + ~{if revcomp then "--revcomp" else ""} ~{read1} \ ~{read2} \ ~{"> " + reportPath} @@ -231,6 +233,7 @@ task Cutadapt { bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"} zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"} noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"} + revcomp: {description: "Equivalent to cutadapt's --revcomp flag.", category: "advanced"} cores: {description: "The number of cores to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 502d73003072327d9756b4b2ce0c2f768ff1192a Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 8 Jun 2023 13:02:14 +0200 Subject: [PATCH 750/902] add missing backslash --- cutadapt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index a164e360..c695c08e 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -150,7 +150,7 @@ task Cutadapt { ~{true="--bwa" false="" bwa} \ ~{true="--zero-cap" false="" zeroCap} \ ~{true="--no-zero-cap" false="" noZeroCap} \ - ~{if revcomp then "--revcomp" else ""} + ~{if revcomp then "--revcomp" else ""} \ ~{read1} \ ~{read2} \ ~{"> " + reportPath} From cebb1b535be90193ed27c57f3ea2c659f20bfe39 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:20:13 +0200 Subject: [PATCH 751/902] add a task for fastqFilter --- fastqFilter.wdl | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 fastqFilter.wdl diff --git a/fastqFilter.wdl b/fastqFilter.wdl new file mode 100644 index 00000000..d436b1ab --- /dev/null +++ b/fastqFilter.wdl @@ -0,0 +1,66 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2023 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task FastqFilter { + input { + Array[File]+ fastq + Array[String]+ outputPaths + Int? minLength + Int? maxLength + + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(seqFile, "G")) + String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" + } + + command { + set -e + mkdir -p $(dirname ~{sep=" " outputPaths}) + fastq-filter \ + -o ~{sep=" -o " outputPaths} \ + ~{"-l " + minLength} \ + ~{"-L " + maxLength} \ + ~{sep=" " fastq} + } + + output { + Array[File] filtered = outputPaths + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + fastq: {description: "A list of fastq files to filter.", category: "required"} + outputPaths: {description: "A list containing the output paths for each input fastq file.", category: "required"} + minLength: {description: "Equivalent to fastq-filter's `--min-length` option.", category: "common"} + maxLength: {description: "Equivalent to fastq-filter's `--max-length` option.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 93e491d37de5780bea73010323dcef939814cdbc Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:21:47 +0200 Subject: [PATCH 752/902] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eb2ef17..34bf0600 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.3.0-dev --------------------------- ++ Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From 3fc46b91cc63c31b1477692638492fdda9bbc084 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:32:34 +0200 Subject: [PATCH 753/902] fix copy-paste error --- fastqFilter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqFilter.wdl b/fastqFilter.wdl index d436b1ab..2b2fcc45 100644 --- a/fastqFilter.wdl +++ b/fastqFilter.wdl @@ -30,7 +30,7 @@ task FastqFilter { Int? maxLength String memory = "4GiB" - Int timeMinutes = 1 + ceil(size(seqFile, "G")) + Int timeMinutes = 1 + ceil(size(fastq, "G")) String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" } From 2bc4c06dd89444b6ccb42244a566873ba7fad5a2 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Tue, 13 Jun 2023 09:37:54 +0200 Subject: [PATCH 754/902] use 1GiB for fastqFilter --- fastqFilter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqFilter.wdl b/fastqFilter.wdl index 2b2fcc45..3701b8aa 100644 --- a/fastqFilter.wdl +++ b/fastqFilter.wdl @@ -29,7 +29,7 @@ task FastqFilter { Int? minLength Int? maxLength - String memory = "4GiB" + String memory = "1GiB" Int timeMinutes = 1 + ceil(size(fastq, "G")) String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" } From 3fb2c1de2e19f68f7a3ab3e205864bff21bb3ba1 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <15814544+Redmar-van-den-Berg@users.noreply.github.com> Date: Thu, 7 Sep 2023 08:48:09 +0200 Subject: [PATCH 755/902] Use softlink instead of hardlinks If the database files are on a different filesystem then the analysis folder, hardlinks are not allowed, leading to crashes. --- centrifuge.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 757af239..41a907ae 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -122,7 +122,7 @@ task Classify { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge \ ~{inputFormatOptions[inputFormat]} \ @@ -199,7 +199,7 @@ task Inspect { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge-inspect \ ~{outputOptions[printOption]} \ @@ -256,7 +256,7 @@ task KReport { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge-kreport \ -x $PWD/${indexBasename} \ From 44cdc1862bf20b1cf77f0fedfb0ba25b3e5efa43 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 7 Sep 2023 08:52:12 +0200 Subject: [PATCH 756/902] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34bf0600..6acbbc85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.3.0-dev --------------------------- ++ Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From 7404b0e6f7470c4d04d80f7037f1068ad091d9ba Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 26 Aug 2024 17:07:03 +0200 Subject: [PATCH 757/902] Add a selectGenotype switch --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 0b93efe6..a2aff322 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1514,6 +1514,7 @@ task SelectVariants { Array[File] intervals = [] String? selectTypeToInclude + String? selectGenotype String javaXmx = "4G" String memory = "5GiB" @@ -1529,6 +1530,7 @@ task SelectVariants { -R ~{referenceFasta} \ -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ + ~{"-select-genotype " + selectGenotype} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From d86d9cb89a8f8b74ad2b714a23e1686fd4f26e3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 27 Aug 2024 10:19:18 +0200 Subject: [PATCH 758/902] Quote select genotype value --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index a2aff322..f272a2f9 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1530,7 +1530,7 @@ task SelectVariants { -R ~{referenceFasta} \ -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ - ~{"-select-genotype " + selectGenotype} \ + ~{"-select-genotype \"" + selectGenotype}~{true="\"" false="" defined(selectGenotype)} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From 558c9b7d7370b0f46346c16beaa4d4cb3f48b09e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 30 Aug 2024 15:23:55 +0200 Subject: [PATCH 759/902] Add exclude filtered expression --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index f272a2f9..230674a5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1513,6 +1513,7 @@ task SelectVariants { String outputPath = "output.vcf.gz" Array[File] intervals = [] + Boolean excludeFiltered = false String? selectTypeToInclude String? selectGenotype @@ -1531,6 +1532,7 @@ task SelectVariants { -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ ~{"-select-genotype \"" + selectGenotype}~{true="\"" false="" defined(selectGenotype)} \ + ~{true="--exclude-filtered" false="" excludeFiltered} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From 75f36133cb52ce6f02701ff11612f6884a8d1726 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 18 Oct 2024 14:52:33 +0200 Subject: [PATCH 760/902] Use reference files in rtg-tools tasks to make tasks cacheable --- rtg.wdl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/rtg.wdl b/rtg.wdl index 3e9dab9b..62e1e77f 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -24,8 +24,7 @@ task Format { input { Array[File]+ inputFiles String format = "fasta" - String outputPath = "seq_data.sdf" - + String outputPath = "reference_data" String rtgMem = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(inputFiles, "GiB") * 2) @@ -41,7 +40,7 @@ task Format { } output { - File sdf = outputPath + Array[File] referenceFiles = glob("~{outputPath}/*") } runtime { @@ -61,7 +60,7 @@ task Format { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - sdf: {description: "RTGSequence Data File (SDF) format version of the input file(s)."} + referenceFiles: {description: "An array with all the generated reference files"} } } @@ -74,7 +73,7 @@ task VcfEval { Boolean squashPloidy = false String outputMode = "split" String outputDir = "output/" - File template + Array[File] referenceFiles Boolean allRecords = false Boolean decompose = false Boolean refOverlap = false @@ -99,7 +98,7 @@ task VcfEval { ~{"--evaluation-regions " + evaluationRegions} \ ~{"--bed-regions " + bedRegions} \ --output ~{outputDir} \ - --template ~{template} \ + --template $(dirname ~{referenceFiles[0]}) \ ~{true="--all-records" false="" allRecords} \ ~{true="--decompose" false="" decompose} \ ~{true="--ref-overlap" false="" refOverlap} \ @@ -152,7 +151,7 @@ task VcfEval { squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences.", category: "common"} outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split).", category: "advanced"} outputDir: {description: "Directory for output.", category: "advanced"} - template: {description: "SDF of the reference genome the variants are called against.", category: "required"} + referenceFiles: {description: "An array of reference Files generated by the Format task.", category: "required"} allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\").", category: "common"} decompose: {description: "decompose complex variants into smaller constituents to allow partial credit.", category: "common"} refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap).", category: "common"} From 53d5083e5ca9de973eba1916dc273e0ff3dd9e04 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:04:31 +0100 Subject: [PATCH 761/902] Update minimap2 task to output sorted BAM --- minimap2.wdl | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/minimap2.wdl b/minimap2.wdl index 96cc7734..47464585 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -81,15 +81,19 @@ task Indexing { task Mapping { input { String presetOption - Int kmerSize = 15 - Boolean skipSelfAndDualMappings = false - Boolean outputSam = false String outputPrefix - Boolean addMDTagToSam = false - Boolean secondaryAlignment = false File referenceFile File queryFile + + Int compressionLevel = 1 + Int additionalSortThreads = 1 + Int sortMemoryGb = 1 + Boolean skipSelfAndDualMappings = false + Boolean addMDTagToSam = false + Boolean secondaryAlignment = true + + Int? kmerSize Int? maxIntronLength Int? maxFragmentLength Int? retainMaxSecondaryAlignments @@ -97,8 +101,8 @@ task Mapping { Int? mismatchPenalty String? howToFindGTAG - Int cores = 4 - String memory = "30GiB" + Int cores = 8 + String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } @@ -108,13 +112,11 @@ task Mapping { mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ -x ~{presetOption} \ - -k ~{kmerSize} \ ~{true="-X" false="" skipSelfAndDualMappings} \ - ~{true="-a" false="" outputSam} \ - -o ~{outputPrefix} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ -t ~{cores} \ + ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ ~{"-F " + maxFragmentLength} \ ~{"-N " + retainMaxSecondaryAlignments} \ @@ -122,11 +124,18 @@ task Mapping { ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ ~{referenceFile} \ - ~{queryFile} + ~{queryFile} \ + | samtools sort \ + -@ ~{additionalSortThreads} \ + -l ~{compressionLevel} \ + -m ~{sortMemoryGb}G \ + -o ~{outputPrefix}.bam + samtools index -o ~{outputPrefix}.bam } output { - File alignmentFile = outputPrefix + File bam = ~{outputPrefix}.bam + File bamIndex = ~{outputPrefix}.bam.bai } runtime { From 77506d8d208b524cfb2427314d4568aac75e4b87 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:05:39 +0100 Subject: [PATCH 762/902] Add a flag for namesorting --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 47464585..64313ef4 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -88,6 +88,7 @@ task Mapping { Int compressionLevel = 1 Int additionalSortThreads = 1 Int sortMemoryGb = 1 + Boolean nameSorted = false Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false @@ -126,6 +127,7 @@ task Mapping { ~{referenceFile} \ ~{queryFile} \ | samtools sort \ + ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ From e78cfa0c198a65d60f6b1adb3e33878c02e5c90f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:18:46 +0100 Subject: [PATCH 763/902] Add clair3 task --- clair3.wdl | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 clair3.wdl diff --git a/clair3.wdl b/clair3.wdl new file mode 100644 index 00000000..eb18d208 --- /dev/null +++ b/clair3.wdl @@ -0,0 +1,61 @@ +version 1.0 + +# Copyright (c) 2024 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Clair3 { + input { + File bam + File bamIndex + File referenceFasta + File referenceFastaFai + String outputPrefix + File? model + String? builtinModel + String platform + Int threads = 8 + Boolean includeAllCtgs = false + String memory = "20GiB" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / cores) + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + } + + # A default set for testing + String modelArg = "~{true=model false=builtinModel, defined(model)}" + + command <<< + run_clair3.sh \ + --model=~{modelArg} \ + --ref_fn=~{reference_fasta} \ + --bam_fn=~{bam} \ + --output=out \ + --threads=~{threads} \ + --platform=~{platform} \ + ~{true="--include_all_ctgs" false =""} + mv out/merge_output.vcf.gz ~{prefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{prefix}.vcf.gz.tbi + >>> + output { + File vcf = "~{outputPrefix}.vcf.gz" + File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" + } + + +} \ No newline at end of file From 0d84d673368819a78296f97f0f5b6c3225439ded Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 12:20:35 +0100 Subject: [PATCH 764/902] Add sequali and update multiqc to a version that supports it --- multiqc.wdl | 2 +- sequali.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 sequali.wdl diff --git a/multiqc.wdl b/multiqc.wdl index 21fc8a7d..f04a1021 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" + String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0 " } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/sequali.wdl b/sequali.wdl new file mode 100644 index 00000000..98700fb7 --- /dev/null +++ b/sequali.wdl @@ -0,0 +1,46 @@ +version 1.0 + +# Copyright (c) 2024 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sequali { + input { + File reads + File? mate_reads + Int threads = 2 + String outDir = "." + dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + } + + command <<< + set -e + mkdir -p $(dirname outputDir) + sequali \ + --outdir ~{outDir} \ + --threads ~{threads} \ + ~{reads} \ + ~{mate_reads} + >>> + + output { + File html = basename(reads) + ".html" + File json = basename(reads) + ".json" + } +} \ No newline at end of file From 272842244d79797615aa430bb6836a8cb78ba8fd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 12:45:59 +0100 Subject: [PATCH 765/902] Fix womtool validation errors --- clair3.wdl | 16 ++++++++-------- minimap2.wdl | 8 ++++---- sequali.wdl | 9 ++++++++- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index eb18d208..6c0c1d38 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -33,25 +33,25 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "20GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } - # A default set for testing - String modelArg = "~{true=model false=builtinModel, defined(model)}" + String modelArg = "~{true=model false=builtinModel defined(model)}" command <<< run_clair3.sh \ --model=~{modelArg} \ - --ref_fn=~{reference_fasta} \ + --ref_fn=~{referenceFasta} \ --bam_fn=~{bam} \ --output=out \ --threads=~{threads} \ --platform=~{platform} \ - ~{true="--include_all_ctgs" false =""} - mv out/merge_output.vcf.gz ~{prefix}.vcf.gz - mv out/merge_output.vcf.gz.tbi ~{prefix}.vcf.gz.tbi + ~{true="--include_all_ctgs" false ="" includeAllCtgs} + mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi >>> + output { File vcf = "~{outputPrefix}.vcf.gz" File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" diff --git a/minimap2.wdl b/minimap2.wdl index 64313ef4..fff5b4ec 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -136,8 +136,8 @@ task Mapping { } output { - File bam = ~{outputPrefix}.bam - File bamIndex = ~{outputPrefix}.bam.bai + File bam = "~{outputPrefix}.bam " + File bamIndex = "~{outputPrefix}.bam.bai" } runtime { @@ -152,7 +152,6 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} - outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} @@ -170,6 +169,7 @@ task Mapping { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} + bam: {description: "Mapping and alignment between collections of dna sequences file in BAM format."} + bamIndex: {description: "Accompanying index file for the BAM file."} } } diff --git a/sequali.wdl b/sequali.wdl index 98700fb7..c2eff2c9 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -26,7 +26,7 @@ task Sequali { File? mate_reads Int threads = 2 String outDir = "." - dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" } command <<< @@ -43,4 +43,11 @@ task Sequali { File html = basename(reads) + ".html" File json = basename(reads) + ".json" } + + runtime { + cpu: threads + memory: "2GiB" + docker: dockerImage + time_minutes: 59 + } } \ No newline at end of file From 01ff19c51bf4b8ff28cf16b067bbb128d2d435b4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 13:12:56 +0100 Subject: [PATCH 766/902] Fix runtime issues --- clair3.wdl | 12 ++++++++++-- minimap2.wdl | 12 +++++++----- multiqc.wdl | 2 +- sequali.wdl | 4 ++-- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index 6c0c1d38..2d111a5d 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,12 +34,14 @@ task Clair3 { Boolean includeAllCtgs = false String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } - String modelArg = "~{true=model false=builtinModel defined(model)}" + String modelArg = "~{if defined(model) then model else builtinModel}" command <<< + set -e + mkdir -p $(dirname ~{outputPrefix}) run_clair3.sh \ --model=~{modelArg} \ --ref_fn=~{referenceFasta} \ @@ -57,5 +59,11 @@ task Clair3 { File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" } + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } } \ No newline at end of file diff --git a/minimap2.wdl b/minimap2.wdl index fff5b4ec..5709c998 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -105,13 +105,15 @@ task Mapping { Int cores = 8 String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + # Minimap 2.28 samtools 1.20 + String dockerImage = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" } command { set -e mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ + -a \ -x ~{presetOption} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ @@ -125,19 +127,19 @@ task Mapping { ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ ~{referenceFile} \ - ~{queryFile} \ + ~{queryFile} \ | samtools sort \ ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam - samtools index -o ~{outputPrefix}.bam + samtools index ~{outputPrefix}.bam } output { - File bam = "~{outputPrefix}.bam " - File bamIndex = "~{outputPrefix}.bam.bai" + File bam = "~{outputPrefix}.bam" + File bamIndex = "~{outputPrefix}.bam.bai" } runtime { diff --git a/multiqc.wdl b/multiqc.wdl index f04a1021..a2e32cdb 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0 " + String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0" } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/sequali.wdl b/sequali.wdl index c2eff2c9..ed6e5d40 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -40,8 +40,8 @@ task Sequali { >>> output { - File html = basename(reads) + ".html" - File json = basename(reads) + ".json" + File html = outDir + "/" + basename(reads) + ".html" + File json = outDir + "/" + basename(reads) + ".json" } runtime { From a488618740428dcc7e940a6b27750ff62b87428e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 13:28:56 +0100 Subject: [PATCH 767/902] Include all contigs by default for clair3 --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 2d111a5d..d824ec13 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -31,7 +31,7 @@ task Clair3 { String? builtinModel String platform Int threads = 8 - Boolean includeAllCtgs = false + Boolean includeAllCtgs = true # Not the clair3 default, but generally what you want. String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" From 1bc3416c90953ba05d3e00370c74355ad0fa7c9b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 15:17:14 +0100 Subject: [PATCH 768/902] Work from a model tar file --- clair3.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index d824ec13..7b2d98fe 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -27,7 +27,7 @@ task Clair3 { File referenceFasta File referenceFastaFai String outputPrefix - File? model + File? modelTar String? builtinModel String platform Int threads = 8 @@ -37,10 +37,11 @@ task Clair3 { String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } - String modelArg = "~{if defined(model) then model else builtinModel}" + String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" command <<< set -e + ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } mkdir -p $(dirname ~{outputPrefix}) run_clair3.sh \ --model=~{modelArg} \ From 8fa481125d3038034a2ae28fedf88809b10e0c98 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 11 Nov 2024 14:30:25 +0100 Subject: [PATCH 769/902] Set includeAlCtgs to false --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 7b2d98fe..bc25394b 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -31,7 +31,7 @@ task Clair3 { String? builtinModel String platform Int threads = 8 - Boolean includeAllCtgs = true # Not the clair3 default, but generally what you want. + Boolean includeAllCtgs = false String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" From 98d9e2c92b0655eb022bd9793b3449ba3eb52b9f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 12 Nov 2024 08:38:00 +0100 Subject: [PATCH 770/902] Increase memory --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index bc25394b..4184f49e 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -32,7 +32,7 @@ task Clair3 { String platform Int threads = 8 Boolean includeAllCtgs = false - String memory = "20GiB" + String memory = "~{threads + 16}GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } From f13a7e2dbe793b2742080b91d90e42b29f6c0e6c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 13 Nov 2024 16:47:03 +0100 Subject: [PATCH 771/902] Update parameter_meta --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 230674a5..655a0b66 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1558,6 +1558,8 @@ task SelectVariants { outputPath: {description: "The location the output VCF file should be written.", category: "advanced"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} selectTypeToInclude: {description: "Select only a certain type of variants from the input file.", category: "common"} + excludeFiltered: {description: "Remove all variants that do not have a PASS filter", category: "advanced"} + selectGenotype: {description: "The genotype to be selected", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 3c8d2e73d12d9cd3101752dff2976f86d61b4c23 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 13 Nov 2024 16:48:14 +0100 Subject: [PATCH 772/902] Update changelog --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acbbc85..6db06e23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,13 +8,14 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.3.0-dev +version 6.0.0-dev --------------------------- ++ rtg Format and VcfEval tasks now handle reference as an array of files to enable caching. ++ Added --select-genotype and --exclude-filtered flags to GATK SelectVariants + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. - version 5.2.0 --------------------------- + Update cutadapt version to 4.4 From a6eec0e6af6554ba1c85a24e3a63b0bcd01cfe76 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Dec 2024 15:51:29 +0100 Subject: [PATCH 773/902] Add a readgroup flag to minimap2 --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 5709c998..e785ffd7 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -101,6 +101,7 @@ task Mapping { Int? matchingScore Int? mismatchPenalty String? howToFindGTAG + String? readgroup Int cores = 8 String memory = "24GiB" @@ -126,6 +127,7 @@ task Mapping { ~{"-A " + matchingScore} \ ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ + ~{"-R '" + readgroup}~{false="" true="'" defined(readgroup)} \ ~{referenceFile} \ ~{queryFile} \ | samtools sort \ From b717f3fa8d82d3bb040d3df134533839f5adec9d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 17 Dec 2024 17:33:59 +0100 Subject: [PATCH 774/902] Add -o pipefail --- minimap2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/minimap2.wdl b/minimap2.wdl index e785ffd7..95b84bc4 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -111,7 +111,7 @@ task Mapping { } command { - set -e + set -e -o pipefail mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ -a \ From 42ca869223960072ca0f9fc1e87aae7f469a4d34 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 8 Jan 2025 17:35:29 +0100 Subject: [PATCH 775/902] Allow copying of comments from fastq --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 95b84bc4..daf47a9a 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -93,6 +93,7 @@ task Mapping { Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false Boolean secondaryAlignment = true + Boolean copyCommentsFromFastq = true Int? kmerSize Int? maxIntronLength @@ -119,6 +120,7 @@ task Mapping { ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ + ~{true="-y" false="" copyCommentsFromFastq} \ -t ~{cores} \ ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ From 7240b178ef378d39b5cb0983cf3a681b0bf52488 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 15:53:23 +0100 Subject: [PATCH 776/902] Allow minimap2 to process uBAM --- minimap2.wdl | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/minimap2.wdl b/minimap2.wdl index daf47a9a..18127cb1 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -89,11 +89,19 @@ task Mapping { Int additionalSortThreads = 1 Int sortMemoryGb = 1 Boolean nameSorted = false + # MM, ML, MN -> Methylation flags + # Also keep the following flags for Sequali to be able to run on the mapped bam file and get ONT information. + # ch -> channel + # st -> start time + # du -> duration + # dx -> Whether read was duplex + # pi -> Parent ID for split read + + String tagsToKeep = "MM,ML,MN,ch,st,du,dx,pi" Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false Boolean secondaryAlignment = true - Boolean copyCommentsFromFastq = true Int? kmerSize Int? maxIntronLength @@ -111,16 +119,21 @@ task Mapping { String dockerImage = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" } - command { + # Always run data through samtools fastq. This supports both FASTQ and uBAM + # files. It does remove any existing FASTQ comments, but this should not be + # problematic for most files. + + command <<< set -e -o pipefail mkdir -p "$(dirname ~{outputPrefix})" + samtools fastq -T "~{tagsToKeep}" ~{queryFile} | \ minimap2 \ -a \ -x ~{presetOption} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ - ~{true="-y" false="" copyCommentsFromFastq} \ + -y \ -t ~{cores} \ ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ @@ -131,7 +144,7 @@ task Mapping { ~{"-u " + howToFindGTAG} \ ~{"-R '" + readgroup}~{false="" true="'" defined(readgroup)} \ ~{referenceFile} \ - ~{queryFile} \ + - \ | samtools sort \ ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ @@ -139,7 +152,7 @@ task Mapping { -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam samtools index ~{outputPrefix}.bam - } + >>> output { File bam = "~{outputPrefix}.bam" @@ -168,6 +181,7 @@ task Mapping { retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} + tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From c7c1b5bb932de4ea6d1ca3069007d4e1ad5c168d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 16:04:08 +0100 Subject: [PATCH 777/902] Allow sample name to set --- clair3.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clair3.wdl b/clair3.wdl index 4184f49e..db2c2fb5 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -27,6 +27,7 @@ task Clair3 { File referenceFasta File referenceFastaFai String outputPrefix + String? sampleName File? modelTar String? builtinModel String platform @@ -50,6 +51,7 @@ task Clair3 { --output=out \ --threads=~{threads} \ --platform=~{platform} \ + ~{"--sample_name=" + sampleName} \ ~{true="--include_all_ctgs" false ="" includeAllCtgs} mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi From e3ceb602b5baf955f850f30301a68bc1a1a1c970 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 16:47:20 +0100 Subject: [PATCH 778/902] Proper numshards to deepvariant and update it to latest version --- deepvariant.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 25d05bd9..2d212000 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -27,19 +27,19 @@ task RunDeepVariant { File inputBam File inputBamIndex String modelType - String outputVcf + String outputVcf = "sample.vcf.gz" String? postprocessVariantsExtraArgs File? customizedModel - Int? numShards + Int numShards = 4 String? outputGVcf String? outputGVcfIndex File? regions String? sampleName Boolean? VCFStatsReport = true - String memory = "3GiB" + String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.0.0" + String dockerImage = "google/deepvariant:1.6.1" } command { @@ -62,6 +62,7 @@ task RunDeepVariant { memory: memory time_minutes: timeMinutes docker: dockerImage + cpu: numShards } output { From a5dca2e7596f50436beb6c69b597722dc4aaa764 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 11:28:46 +0100 Subject: [PATCH 779/902] Add modkit pileup --- modkit.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 modkit.wdl diff --git a/modkit.wdl b/modkit.wdl new file mode 100644 index 00000000..4ac6bfa6 --- /dev/null +++ b/modkit.wdl @@ -0,0 +1,64 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Pileup { + input { + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + File bam + File bamIndex + String outputBed = "output.bed" + File referenceFasta + File referenceFastaFai + + Int? intervalSize + File? includeBed + + Boolean cpg = false + Boolean combineMods = false + String logFilePath = "modkit.log" + + Int threads = 4 + + } + + command <<< + set -e + mkdir -p $(dirname ~{outputBed}) + mkdir -p $(dirname ~{logFilePath}) + modkit pileup \ + --threads ~{threads} \ + ~{"--interval-size " + intervalSize} \ + ~{"--include-bed " + includeBed} + --ref ~{referenceFasta} \ + ~{true="--cpg" false="" cpg} \ + ~{true="--combine-mods" false="" combineMods} \ + --log-filepath ~{logFilePath} \ + ~{bam} \ + ~{outputBed} + >>> + + runtime { + docker: dockerImage + cpu: threads + + } +} \ No newline at end of file From 085fc5dd691444c9bcdb6c0483413ce5c1cf8d5f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:15:28 +0100 Subject: [PATCH 780/902] Update modkit --- modkit.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index 4ac6bfa6..9f311121 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -56,6 +56,11 @@ task Pileup { ~{outputBed} >>> + output { + File out = outputBed + File logFile = logFilePath + } + runtime { docker: dockerImage cpu: threads From 3540b4a12a2b7d56249f2d20941a6526af9c8f6e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:19:14 +0100 Subject: [PATCH 781/902] Add memory to modkit --- modkit.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index 9f311121..96f92c41 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -37,6 +37,7 @@ task Pileup { String logFilePath = "modkit.log" Int threads = 4 + String memory = "16GiB" } @@ -64,6 +65,7 @@ task Pileup { runtime { docker: dockerImage cpu: threads + memory: memory } } \ No newline at end of file From bc179875e1cf04fcd4efc63338b73d1230e3ef96 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:23:21 +0100 Subject: [PATCH 782/902] Add missing backslash --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 96f92c41..4f8bceb4 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -48,7 +48,7 @@ task Pileup { modkit pileup \ --threads ~{threads} \ ~{"--interval-size " + intervalSize} \ - ~{"--include-bed " + includeBed} + ~{"--include-bed " + includeBed} \ --ref ~{referenceFasta} \ ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ From c69c5cb2031913669dba5bf2cfe1acc4b00fed95 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 15:44:00 +0100 Subject: [PATCH 783/902] Set rather high defaults for time and memory for modkit --- modkit.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 4f8bceb4..d827d896 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -36,8 +36,9 @@ task Pileup { Boolean combineMods = false String logFilePath = "modkit.log" - Int threads = 4 - String memory = "16GiB" + Int threads = 8 + String memory = "48GiB" + Int timeMinutes = 4320 # 3 Days } @@ -66,6 +67,6 @@ task Pileup { docker: dockerImage cpu: threads memory: memory - + time_minutes: timeMinutes } } \ No newline at end of file From beec409c6e2ce345d6976f159d7da73b79110fe4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 16:35:59 +0100 Subject: [PATCH 784/902] Upgrade sequali memory --- sequali.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sequali.wdl b/sequali.wdl index ed6e5d40..664fc082 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -46,7 +46,7 @@ task Sequali { runtime { cpu: threads - memory: "2GiB" + memory: "4GiB" docker: dockerImage time_minutes: 59 } From a87956ed26298c48b29f23782dc268f8d8bf29ff Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2025 15:10:51 +0100 Subject: [PATCH 785/902] Add modkit flags --- modkit.wdl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index d827d896..35d3c7fc 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -34,6 +34,9 @@ task Pileup { Boolean cpg = false Boolean combineMods = false + Boolean combineStrands = false + Boolean bedgraph = false + String? ignore String logFilePath = "modkit.log" Int threads = 8 @@ -50,9 +53,12 @@ task Pileup { --threads ~{threads} \ ~{"--interval-size " + intervalSize} \ ~{"--include-bed " + includeBed} \ + ~{"--ignore " + ignore} \ --ref ~{referenceFasta} \ ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ + ~{true="--combine-strands" false="" combineStrands} \ + ~{true="--bedgraph" false="" bedgraph} \ --log-filepath ~{logFilePath} \ ~{bam} \ ~{outputBed} From 730a8a7672b491ccac1dbfdab497a9420ac40f71 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2025 16:12:37 +0100 Subject: [PATCH 786/902] Capture multiple output files --- modkit.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 35d3c7fc..1cac1bd1 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -65,7 +65,8 @@ task Pileup { >>> output { - File out = outputBed + File? out = outputBed # Normal mode + Array[File] outFiles = glob(outputBed + "/*") # Bedgraph mode File logFile = logFilePath } From ed50e2dfb30a8f354f4e0dd2a4f7ae5aeec952fe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Feb 2025 17:01:46 +0100 Subject: [PATCH 787/902] Update documentation for new tasks --- clair3.wdl | 22 ++++++++++++++++++++++ modkit.wdl | 30 +++++++++++++++++++++++++++++- sequali.wdl | 25 ++++++++++++++++++++++--- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index db2c2fb5..709d59b5 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -69,4 +69,26 @@ task Clair3 { docker: dockerImage } + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputPrefix: {description: "The output prefix where the data should be placed.", category: "common"} + modelTar: {description: "The TAR file with the model", category: "common"} + builtinModel: {description: "The builtin model name (in case a tar file is not used)", category: "common"} + sampleName: {description: "The name of the sample in the VCF", category: "common"} + platform: {description: "platform setting for clair3.", category: "required"} + includeAllCtgs: {description: "whether or not to call all contigs in the reference", category: "advanced"} + threads: {description: "The number of threads to use for variant calling.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + vcf: {description: "Output VCF file."} + vcfIndex: {description: "Output VCF index."} + + } } \ No newline at end of file diff --git a/modkit.wdl b/modkit.wdl index 1cac1bd1..382bfc09 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -22,7 +22,6 @@ version 1.0 task Pileup { input { - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" File bam File bamIndex String outputBed = "output.bed" @@ -42,6 +41,7 @@ task Pileup { Int threads = 8 String memory = "48GiB" Int timeMinutes = 4320 # 3 Days + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } @@ -76,4 +76,32 @@ task Pileup { memory: memory time_minutes: timeMinutes } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputBed: {description: "The output name where the data should be placed.", category: "common"} + + intervalSize: {description: "Sets the interval size", category: "advanced"} + includeBed: {description: "Bed file with regions to include", category: "advanced"} + cpg: {description: "Whether to call only at cpg sites", category: "advanced"} + combineMods: {description: "Whether to combine modifications in the output", category: "advanced"} + combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} + bedgraph: {description: "Whether to create a folder instead with a bedgraph file", category: "advanced"} + ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} + logFilePath: {description: "Path where the log file should be written.", category: "advanced"} + + threads: {description: "The number of threads to use for variant calling.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + out: {description: "The output bed files. Not available when bedgraph = true."} + outFiles: {description: "Output files when bedgraph = true."} + logFile: {description: "The generated log file."} + } } \ No newline at end of file diff --git a/sequali.wdl b/sequali.wdl index 664fc082..cbd3d869 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -24,9 +24,12 @@ task Sequali { input { File reads File? mate_reads - Int threads = 2 String outDir = "." + + Int threads = 2 + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + Int timeMinutes = 59 } command <<< @@ -46,8 +49,24 @@ task Sequali { runtime { cpu: threads - memory: "4GiB" + memory: memory docker: dockerImage - time_minutes: 59 + time_minutes: timeMinutes + } + parameter_meta { + # inputs + reads: {description: "A FASTQ or BAM file.", category: "required"} + mate_reads: {description: "FASTQ mate file"} + threads: {description: "The number of cores to use.", category: "advanced"} + + outDir: {description: "The path to write the output to.", catgory: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + html: {description: "HTML report file."} + json: {description: "JSON report file for use with MultiQC."} } } \ No newline at end of file From 113d4c58930aa2fcde99eed5b018bb8061e612cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 09:13:34 +0100 Subject: [PATCH 788/902] Update changelog --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acbbc85..97a1d016 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,14 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.3.0-dev +version 6.0.0-dev --------------------------- ++ Add Sequali task. ++ Add Clair3 task. ++ Add Modkit task. ++ Modify minimap2 task to accept ubam input, including transfer of methylation + tags. Also sort the BAM output file by coordinate. ++ Update DeepVariant container and update resource requirements. + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From a01b54a0b79a135b3ddf319f71e51d1ef06f0f56 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:00:01 +0100 Subject: [PATCH 789/902] Indent clair3 command --- clair3.wdl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index 709d59b5..4d9092f2 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -41,20 +41,20 @@ task Clair3 { String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" command <<< - set -e - ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } - mkdir -p $(dirname ~{outputPrefix}) - run_clair3.sh \ - --model=~{modelArg} \ - --ref_fn=~{referenceFasta} \ - --bam_fn=~{bam} \ - --output=out \ - --threads=~{threads} \ - --platform=~{platform} \ - ~{"--sample_name=" + sampleName} \ - ~{true="--include_all_ctgs" false ="" includeAllCtgs} - mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz - mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi + set -e + ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } + mkdir -p $(dirname ~{outputPrefix}) + run_clair3.sh \ + --model=~{modelArg} \ + --ref_fn=~{referenceFasta} \ + --bam_fn=~{bam} \ + --output=out \ + --threads=~{threads} \ + --platform=~{platform} \ + ~{"--sample_name=" + sampleName} \ + ~{true="--include_all_ctgs" false ="" includeAllCtgs} + mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi >>> output { From b409ca9ed22505252a4ddf8f451eb9b55be530f1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:12:07 +0100 Subject: [PATCH 790/902] More realistic resource requirements for modkit --- modkit.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 382bfc09..92905f06 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -39,8 +39,8 @@ task Pileup { String logFilePath = "modkit.log" Int threads = 8 - String memory = "48GiB" - Int timeMinutes = 4320 # 3 Days + String memory = "4GiB" + Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From bdab5a4c0d0e8474bea79435cc128e50fe5109d2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:12:27 +0100 Subject: [PATCH 791/902] More specific bed file naming --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 92905f06..23269bf3 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -24,7 +24,7 @@ task Pileup { input { File bam File bamIndex - String outputBed = "output.bed" + String outputBed = "output.methyl.bed" File referenceFasta File referenceFastaFai From c79ebd4affcc6524e671da9d6d63f98c9d3674c8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:13:54 +0100 Subject: [PATCH 792/902] Correct file extension for modkit --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 23269bf3..930b6de9 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -24,7 +24,7 @@ task Pileup { input { File bam File bamIndex - String outputBed = "output.methyl.bed" + String outputBed = "output.bedMethyl" File referenceFasta File referenceFastaFai From 1580aae26fbec6b819d0a905959dbad7acf6fd63 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:14:19 +0100 Subject: [PATCH 793/902] Correct whitespacing Co-authored-by: Davy Cats --- sequali.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/sequali.wdl b/sequali.wdl index cbd3d869..b43cf281 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -53,6 +53,7 @@ task Sequali { docker: dockerImage time_minutes: timeMinutes } + parameter_meta { # inputs reads: {description: "A FASTQ or BAM file.", category: "required"} From 63dceb22e11e16a45f8ac04f1c466100e8a263f6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Feb 2025 16:24:21 +0100 Subject: [PATCH 794/902] Start on a VEP task --- vep.wdl | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 vep.wdl diff --git a/vep.wdl b/vep.wdl new file mode 100644 index 00000000..83eeac4e --- /dev/null +++ b/vep.wdl @@ -0,0 +1,74 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Vep { + input { + File inputFile + String outputPath = "vep.annotated.vcf.gz" + File cacheTar + File? pluginsTar + String? species + Array[String] plugins = [] + Boolean refseq = false + Boolean merged = false + + Boolean everything = false + Boolean symbol = false + + } + + command <<< + set -e + mkdir vep_cache + tar -x --directory vep_cache -f ~{cacheTar} + ~{"tar -x --directory vep_cache -f " + pluginsTar} + + # Output all stats files by default for MultiQC integration + vep \ + --input_file ~{inputFile} \ + ~{"--species " + species} \ + --stats_html --stats_text \ + --dir vep_cache \ # Output all stats files by default for MultiQC integration + + --offline \ + ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + --vcf \ + --compress-output bgzip \ + ~{true="--refseq" false="" refseq} \ + ~{true="--merged" false="" merged} \ + \ + ~{true="--everything" false="" everything} \ + ~{true="--symbol" false="" symbol} \ + + + # Cleanup the tar extract to save filesystem space + rm -rf vep_cache + + + >>> + + output { + File outputFile = outputPath + File statsHtml = outputPath + "_summary.html" + } + +} \ No newline at end of file From 405395d512611775ed38021d79b3f4f570d0f23e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 14:31:23 +0100 Subject: [PATCH 795/902] Add runtime requirements --- vep.wdl | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 83eeac4e..496a6b8f 100644 --- a/vep.wdl +++ b/vep.wdl @@ -34,7 +34,10 @@ task Vep { Boolean everything = false Boolean symbol = false - } + String memory = "8GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" + } command <<< set -e @@ -71,4 +74,15 @@ task Vep { File statsHtml = outputPath + "_summary.html" } -} \ No newline at end of file + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From b6107be5cdfaf396e53f25f2d93b6220d1f14eb7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:06:54 +0100 Subject: [PATCH 796/902] Take into account cache tar size for runtime --- vep.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 496a6b8f..4cec3fa3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -35,7 +35,8 @@ task Vep { Boolean symbol = false String memory = "8GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + # Account time for unpacking the cache. + Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } From 5401a6050c9c288f20569b1ffb943f1a05b19d19 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:20:41 +0100 Subject: [PATCH 797/902] Cleanup command --- vep.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vep.wdl b/vep.wdl index 4cec3fa3..f9e7a4a0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -36,7 +36,7 @@ task Vep { String memory = "8GiB" # Account time for unpacking the cache. - Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) + Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } @@ -51,8 +51,7 @@ task Vep { --input_file ~{inputFile} \ ~{"--species " + species} \ --stats_html --stats_text \ - --dir vep_cache \ # Output all stats files by default for MultiQC integration - + --dir vep_cache \ --offline \ ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ --vcf \ From 701b819d7bebab81385dbd3c159f31ab37e5961b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:41:20 +0100 Subject: [PATCH 798/902] Add missing ~ --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index f9e7a4a0..636a8ce0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -53,7 +53,7 @@ task Vep { --stats_html --stats_text \ --dir vep_cache \ --offline \ - ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ --compress-output bgzip \ ~{true="--refseq" false="" refseq} \ From e4654bc7be895cdf5fc80c02fdbfb84b8941d2aa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:51:53 +0100 Subject: [PATCH 799/902] properly format commandline option --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 636a8ce0..626257a3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -55,7 +55,7 @@ task Vep { --offline \ ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ - --compress-output bgzip \ + --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ \ From bda5ff43ad460a51adcfa9daeb3432ec2156c80d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:21:23 +0100 Subject: [PATCH 800/902] Fix trailing whitespace --- vep.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 626257a3..f2ca4a6e 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,7 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ - ~{"--species " + species} \ + ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ --offline \ @@ -58,7 +58,6 @@ task Vep { --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ - \ ~{true="--everything" false="" everything} \ ~{true="--symbol" false="" symbol} \ From 967934c2fd0a4a4f29e4ad87475cd9c68a22298a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:39:43 +0100 Subject: [PATCH 801/902] Add missing output file param --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index f2ca4a6e..064cf41a 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,6 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ + --output_file ~{outputPath} \ ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ @@ -71,6 +72,7 @@ task Vep { output { File outputFile = outputPath File statsHtml = outputPath + "_summary.html" + File statsTxt = outputPath + "_summary.txt" } runtime { From 115f3cfc0da031309a42a5a02d0825a06e1d3e85 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 17:03:00 +0100 Subject: [PATCH 802/902] Make sure output directory is made --- vep.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/vep.wdl b/vep.wdl index 064cf41a..7fb6a660 100644 --- a/vep.wdl +++ b/vep.wdl @@ -43,6 +43,7 @@ task Vep { command <<< set -e mkdir vep_cache + mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} From f29492641550c6d2247a40d216d53c5030d7983d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:22:49 +0100 Subject: [PATCH 803/902] Complete VEP task --- vep.wdl | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/vep.wdl b/vep.wdl index 7fb6a660..8a5a443b 100644 --- a/vep.wdl +++ b/vep.wdl @@ -41,12 +41,14 @@ task Vep { } command <<< - set -e + set -eu mkdir vep_cache mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} + # Make sure vep can error, so the removal always succeeds. + set +e # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ @@ -61,13 +63,14 @@ task Vep { ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ ~{true="--everything" false="" everything} \ - ~{true="--symbol" false="" symbol} \ - + ~{true="--symbol" false="" symbol} + VEP_EXIT_CODE=$? + set -e # Cleanup the tar extract to save filesystem space rm -rf vep_cache - + exit $VEP_EXIT_CODE >>> output { @@ -83,8 +86,23 @@ task Vep { } parameter_meta { + # input + inputFile: {description: "The VCF to annotate.", category: "required"} + outputPath: {description: "Where to put the output file", category: "advanced"} + cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} + pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + refseq: {description: "Use the refseq cache", category: "common"} + merged: {description: "Use the merged cache", category: "common"} + everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} + symbol: {description: "Add the gene symbol to the output where available", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + outputFile: {description: "The annotated VEP VCF file."} + statsHtml: {description: "The VEP summary stats HTML file."} + statsTxt: {description: "The VEP summary stats TXT file."} } } From eca4681a0baf841dc2fffc2ca3f22930822740a5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:45:48 +0100 Subject: [PATCH 804/902] Add VEP to the changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1276efaa..378731bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add VEP task. + Add Sequali task. + Add Clair3 task. + Add Modkit task. From 203d178e3ea80abef927e7f1ac67d00fec93ff75 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:15:50 +0100 Subject: [PATCH 805/902] Add missing parameter_meta for VEP --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index 8a5a443b..349242fb 100644 --- a/vep.wdl +++ b/vep.wdl @@ -91,6 +91,8 @@ task Vep { outputPath: {description: "Where to put the output file", category: "advanced"} cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + species: {description: "Which species cache to use", category: "common"} + plugins: {description: "Which plugins to use", category: "common"} refseq: {description: "Use the refseq cache", category: "common"} merged: {description: "Use the merged cache", category: "common"} everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} From 117e5317fbb50c5989b1afd668d469569b78127e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:20:15 +0100 Subject: [PATCH 806/902] Add missing Minimap2 parameter_meta --- minimap2.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 18127cb1..da301bd3 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -183,6 +183,11 @@ task Mapping { mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} + compressionLevel: {description: "compressionLevel for the output file", category: "advanced"} + additionalSortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} + nameSorted: {description: "Output a name sorted file instead", category: "common"} + cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 319501e7ebbc0fa76baaac1d48d56294eda4b86c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:21:25 +0100 Subject: [PATCH 807/902] Add a samtools split task --- samtools.wdl | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..a82bbda1 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -514,6 +514,61 @@ task Sort { } } +task Split { + input { + File inputBam + Directory outputPath + String? unaccountedPath + String? filenameFormat = "%!.%." + String? outputFormat = "bam" + Boolean writeIndex = false + + Int threads = 1 + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + } + + command { + set -e + mkdir -p "~{outputPath}" + samtools split \ + --output-fmt ~{outputFormat} \ + -f "~{outputPath}/rg/~{filenameFormat}" \ + ~{"-u " + unaccountedPath} \ + ~{true="--write-index" false="" writeIndex} \ + ~{inputBam} + } + + output { + Array[File] split = glob(outputPath + "/rg/*." + outputFormat) + File? unaccounted = unaccountedPath + } + + runtime { + cpu: threads + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputBam: {description: "The bam file to split.", category: "required"} + outputPath: {description: "Directory to store output bams", category: "required"} + + # Optional parameters + unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "optional"} + filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "format"} + outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "format"} + writeIndex: {description: "Automatically index outputs", category: "indexing"} + + # outputs + split: {description: "BAM file split by read groups"} + unaccounted: {description: "Reads with no RG tag or an unrecognised RG tag."} + } +} + task Tabix { input { File inputFile From 60dcef74f6229d81d19436a361f3e4e6aa41ddd0 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:22:35 +0100 Subject: [PATCH 808/902] Register in changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..2993ddc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ New samtools task: split. version 5.2.0 --------------------------- From 4030091ee212be3cc040c69a61834684b8c8be0e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:27:49 +0100 Subject: [PATCH 809/902] Directory not yet available --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index a82bbda1..51230097 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -517,7 +517,7 @@ task Sort { task Split { input { File inputBam - Directory outputPath + String outputPath String? unaccountedPath String? filenameFormat = "%!.%." String? outputFormat = "bam" From 8a0de277c0b69a7607757a0c8c102a379e8e444c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:28:19 +0100 Subject: [PATCH 810/902] Must be defined --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 51230097..a2be09a4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -519,8 +519,8 @@ task Split { File inputBam String outputPath String? unaccountedPath - String? filenameFormat = "%!.%." - String? outputFormat = "bam" + String filenameFormat = "%!.%." + String outputFormat = "bam" Boolean writeIndex = false Int threads = 1 From b70891c3aea7314777aaf5122de3beadf10965e3 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 12:27:17 +0100 Subject: [PATCH 811/902] noticed in wdl-aid that only these are permitted --- samtools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index a2be09a4..2fe9a9f7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -558,10 +558,10 @@ task Split { outputPath: {description: "Directory to store output bams", category: "required"} # Optional parameters - unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "optional"} - filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "format"} - outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "format"} - writeIndex: {description: "Automatically index outputs", category: "indexing"} + unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} + filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} + outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} + writeIndex: {description: "Automatically index outputs", category: "advanced"} # outputs split: {description: "BAM file split by read groups"} From 1ec88558c5b21cb1362518b2c4af95a865abcc68 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:01:26 +0100 Subject: [PATCH 812/902] Add compression level parameter, defaulting to 1 --- samtools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 2fe9a9f7..c46ea88b 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -523,6 +523,8 @@ task Split { String outputFormat = "bam" Boolean writeIndex = false + Int compressionLevel = 1 + Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) @@ -534,6 +536,7 @@ task Split { mkdir -p "~{outputPath}" samtools split \ --output-fmt ~{outputFormat} \ + --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ ~{true="--write-index" false="" writeIndex} \ @@ -562,6 +565,7 @@ task Split { filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} writeIndex: {description: "Automatically index outputs", category: "advanced"} + compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs split: {description: "BAM file split by read groups"} From 153db04100bf78f07b898d523a6da84544d8a02b Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:01:37 +0100 Subject: [PATCH 813/902] default to indexing --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index c46ea88b..554d0903 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -521,7 +521,7 @@ task Split { String? unaccountedPath String filenameFormat = "%!.%." String outputFormat = "bam" - Boolean writeIndex = false + Boolean writeIndex = true Int compressionLevel = 1 From 1522785ae1cec9254e5bf57f942260eab2babfd4 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:08:33 +0100 Subject: [PATCH 814/902] Remove control of output format --- samtools.wdl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 554d0903..7eba529c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -520,7 +520,6 @@ task Split { String outputPath String? unaccountedPath String filenameFormat = "%!.%." - String outputFormat = "bam" Boolean writeIndex = true Int compressionLevel = 1 @@ -535,7 +534,7 @@ task Split { set -e mkdir -p "~{outputPath}" samtools split \ - --output-fmt ~{outputFormat} \ + --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ @@ -544,7 +543,7 @@ task Split { } output { - Array[File] split = glob(outputPath + "/rg/*." + outputFormat) + Array[File] splitBam = glob(outputPath + "/rg/*.bam") File? unaccounted = unaccountedPath } @@ -563,7 +562,6 @@ task Split { # Optional parameters unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} - outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} writeIndex: {description: "Automatically index outputs", category: "advanced"} compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} From 2bba90e99bbc61dc08905a569d8bbb3df285878a Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:08:42 +0100 Subject: [PATCH 815/902] include indexes --- samtools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 7eba529c..bfed7560 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -544,6 +544,7 @@ task Split { output { Array[File] splitBam = glob(outputPath + "/rg/*.bam") + Array[File] splitBamIndex = glob(outputPath + "/rg/*.bai") File? unaccounted = unaccountedPath } @@ -566,7 +567,8 @@ task Split { compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs - split: {description: "BAM file split by read groups"} + splitBam: {description: "BAM file split by read groups"} + splitBamIndex: {description: "BAM indexes"} unaccounted: {description: "Reads with no RG tag or an unrecognised RG tag."} } } From bd4a8567cdedabf6aa1e779fa1af731b09e64b49 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 15:19:02 +0100 Subject: [PATCH 816/902] write index is non-optional --- samtools.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index bfed7560..1660aac3 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -520,7 +520,6 @@ task Split { String outputPath String? unaccountedPath String filenameFormat = "%!.%." - Boolean writeIndex = true Int compressionLevel = 1 @@ -538,7 +537,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ - ~{true="--write-index" false="" writeIndex} \ + --write-index \ ~{inputBam} } @@ -563,7 +562,6 @@ task Split { # Optional parameters unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} - writeIndex: {description: "Automatically index outputs", category: "advanced"} compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs From be0aabe03a8615dad5190b5e4c4c9869bb472c2e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 15:49:15 +0100 Subject: [PATCH 817/902] make subdirectory as well --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 1660aac3..c452664c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -531,7 +531,7 @@ task Split { command { set -e - mkdir -p "~{outputPath}" + mkdir -p "~{outputPath}/rg/" samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 10e83c1c116d55d148534c7f9fc56056773aadb7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 16:03:06 +0100 Subject: [PATCH 818/902] emits csi extension instead --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index c452664c..191a99a2 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -543,7 +543,7 @@ task Split { output { Array[File] splitBam = glob(outputPath + "/rg/*.bam") - Array[File] splitBamIndex = glob(outputPath + "/rg/*.bai") + Array[File] splitBamIndex = glob(outputPath + "/rg/*.bam.csi") File? unaccounted = unaccountedPath } From 6ebf7cd161f15add1c8ed9af8f000ab0952d232c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 16:14:42 +0100 Subject: [PATCH 819/902] missing threads --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 191a99a2..19ad8dab 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -537,6 +537,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ + --threads ~{threads} \ --write-index \ ~{inputBam} } From 6f9350106827f108f7be38b0d0440a0243174664 Mon Sep 17 00:00:00 2001 From: Helena Date: Mon, 10 Mar 2025 14:00:24 +0100 Subject: [PATCH 820/902] Update samtools.wdl --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..66dc647f 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -167,7 +167,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" - Int timeMinutes = 1 + ceil(size(inputBam) * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } From 0ff8d9891a82ff8daf784b782d5007b4ed5cdd16 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 19 Mar 2025 18:33:55 +0100 Subject: [PATCH 821/902] Add link to mentioned VEP website to save time in future --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 349242fb..e99c9fdb 100644 --- a/vep.wdl +++ b/vep.wdl @@ -89,7 +89,7 @@ task Vep { # input inputFile: {description: "The VCF to annotate.", category: "required"} outputPath: {description: "Where to put the output file", category: "advanced"} - cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} + cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work (http://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html)", category: "required"} pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} species: {description: "Which species cache to use", category: "common"} plugins: {description: "Which plugins to use", category: "common"} From 3ea61f0d2fe6f16eba1afde9255c15bc368975dd Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 25 Mar 2025 14:56:01 +0100 Subject: [PATCH 822/902] Add a samtools quickcheck task which returns the input bam. This is designed to enable us to more quickly catch problematic BAMs, and fail earlier in the pipeline than after we've wasted some significant compute time. --- CHANGELOG.md | 1 + samtools.wdl | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..a41b47cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Added `samtools.Quickcheck` to allow failing on truncated files early. version 5.2.0 --------------------------- diff --git a/samtools.wdl b/samtools.wdl index 66dc647f..ea615bae 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -452,6 +452,46 @@ task Merge { } } +task Quickcheck { + input { + File inputBam + + Int threads = 1 + Int memoryGb = 1 + Int timeMinutes = 1 + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + } + + command { + set -e + samtools quickcheck ~{inputBam} + } + + output { + File outputBam = inputBam + } + + runtime { + cpu: threads + memory: "~{memoryGb}GiB" + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM/SAM/CRAM file.", category: "required"} + + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "The exact same input file, but use this so it is recognised as a dependent task."} + } +} + task Sort { input { File inputBam From 38c5c9ad46e56e6c6e04853bc278e07c24221a28 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 25 Mar 2025 14:20:20 +0100 Subject: [PATCH 823/902] Collate fastq file before splitting It was reported to me that the _R1/_R2 from `samtools fastq` were not collated properly, that a single read was appearing in two wildly different places in R1/R2 which is completely silly. I have tried to reproduce this but thus far have been unable to: $ samtools view -b FILE.bam chrM > tmp.bam $ du -h tmp.bam 560K tmp.bam $ samtools fastq -1 paired1.fq -2 paired2.fq -0 /dev/null -s /dev/null -n tmp.bam [M::bam2fq_mainloop] discarded 480 singletons [M::bam2fq_mainloop] processed 608 reads $ diff <(grep ^@D paired1.fq) <(grep ^@D paired2.fq) $ Note the complete lack of difference between ordering. But if we look at the output of files which have come out of this tool, there are clear differences: $ zless R1.fastq.gz | grep '^@' | head -n 3 @D_____________________:1108:3364:16050 @D_____________________:2113:10647:9989 @D_____________________:2208:9374:82968 $ zless R2.fastq.gz | grep '^@' | head -n 3 @D_____________________:1108:3364:16050 @D_____________________:1214:3361:56060 @D_____________________:1309:8329:98995 these were produced by the command $ set -e $ mkdir -p "$(dirname split/R1.fastq.gz)" $ samtools fastq \ -1 split/R1.fastq.gz \ -2 split/R2.fastq.gz \ -n \ --threads 1 \ /mnt/miniwdl/out.bam This is indeed documented behaviour however: > If the input contains read-pairs which are to be interleaved or > written to separate files in the same order, then the input should be > first collated by name. Use samtools collate or samtools sort -n to > ensure this. > > https://www.htslib.org/doc/samtools-fasta.html#DESCRIPTION So it makes some sense to collate, or at some point ensure that the BAMs are sorted. I think there is a discussion to be had over whether automatic collation in sensible or a waste of runtime, but on the other hand, this is maybe a small footgun and eliminating it would make sense to reduce the potential failure modes (give our focus on reducing risk and all.) --- CHANGELOG.md | 1 + samtools.wdl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..abf77c00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. version 5.2.0 --------------------------- diff --git a/samtools.wdl b/samtools.wdl index 66dc647f..02a5ed52 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -174,6 +174,7 @@ task Fastq { command { set -e mkdir -p "$(dirname ~{outputRead1})" + samtools collate -u -O ~{inputBam} | \ samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-2 " + outputRead2} \ @@ -184,8 +185,7 @@ task Fastq { ~{true="-N" false="-n" appendReadNumber} \ ~{true="-O" false="" outputQuality} \ ~{"-c " + compressionLevel} \ - ~{"--threads " + threads} \ - ~{inputBam} + ~{"--threads " + threads} } output { From 47efde79998bd64c25ef546e6387ff37254fa192 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 26 Mar 2025 12:23:19 +0100 Subject: [PATCH 824/902] Hardcode runtime per feedback --- samtools.wdl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index ea615bae..8bb2df87 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -456,9 +456,6 @@ task Quickcheck { input { File inputBam - Int threads = 1 - Int memoryGb = 1 - Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } @@ -472,9 +469,7 @@ task Quickcheck { } runtime { - cpu: threads - memory: "~{memoryGb}GiB" - time_minutes: timeMinutes + time_minutes: 5 docker: dockerImage } @@ -482,9 +477,6 @@ task Quickcheck { # inputs inputBam: {description: "The input BAM/SAM/CRAM file.", category: "required"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs From 9fd1c2cfb9431a31d48dab6eaadf9f14faf96326 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 26 Mar 2025 14:13:59 +0100 Subject: [PATCH 825/902] do not use default cpu/mem --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 8bb2df87..a009500c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -469,7 +469,9 @@ task Quickcheck { } runtime { + cpu: 1 time_minutes: 5 + memory: "1GiB" docker: dockerImage } From d0cc47c6421d990b2f2ed18b6ef5476cd5a19dd4 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Thu, 27 Mar 2025 17:46:10 +0100 Subject: [PATCH 826/902] Add wa/wb/s flags to bedtools intersect Fix bug whereby missing outdir would cause a failure. --- CHANGELOG.md | 1 + bedtools.wdl | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..4bd6ae1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. version 5.2.0 --------------------------- diff --git a/bedtools.wdl b/bedtools.wdl index fe18ede6..a5d8aab3 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -267,6 +267,10 @@ task Intersect { File? faidx # Giving a faidx file will set the sorted option. + Boolean writeA = false + Boolean writeB = false + Boolean stranded = false + String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -276,10 +280,14 @@ task Intersect { command { set -e + mkdir -p "$(dirname ~{outputBed})" ~{"cut -f1,2 " + faidx} ~{true="> sorted.genome" false ="" sorted} bedtools intersect \ -a ~{regionsA} \ -b ~{regionsB} \ + ~{true="-wa" false="" writeA} \ + ~{true="-wb" false="" writeB} \ + ~{true="-s" false="" stranded} \ ~{true="-sorted" false="" sorted} \ ~{true="-g sorted.genome" false="" sorted} \ > ~{outputBed} @@ -301,6 +309,11 @@ task Intersect { regionsB: {description: "Region file b to intersect.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", category: "common"} + + writeA: {description: "Write the original entry in A for each overlap.", category: "advanced"} + writeB: {description: "Write the original entry in B for each overlap. Useful for knowing what A overlaps.", category: "advanced"} + stranded: {description: "Force “strandedness”. That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From fff0fe8fe9cf1f022369dcfb05e5f4980f0f8115 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 08:52:51 +0200 Subject: [PATCH 827/902] Update pbmm2 image --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index ea7c05df..91b0b1fe 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -31,7 +31,7 @@ task Mapping { Int cores = 4 String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) - String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" + String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" } command { From 084486c19bcde6398d41381c0628f5c359c7c53b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 09:05:59 +0200 Subject: [PATCH 828/902] Add pbmm2 outputPrefix parameter --- CHANGELOG.md | 2 ++ pbmm2.wdl | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dde73d44..dd536e5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Allow pbmm2 to work with a set output prefix for the BAM file. ++ Update pbmm2 docker container to version 1.17 + Add VEP task. + Add Sequali task. + Add Clair3 task. diff --git a/pbmm2.wdl b/pbmm2.wdl index 91b0b1fe..915fbb02 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -25,6 +25,7 @@ task Mapping { String presetOption Boolean sort=true String sample + String outputPrefix = sample + ".align" File referenceMMI File queryFile @@ -35,6 +36,8 @@ task Mapping { } command { + set -e + mkdir -p ~{outputPrefix} pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ @@ -42,12 +45,12 @@ task Mapping { ~{referenceMMI} \ ~{queryFile} \ --sample ~{sample} \ - ~{sample}.align.bam + ~{outputPrefix}.bam } output { - File outputAlignmentFile = sample + ".align.bam" - File outputIndexFile = sample + ".align.bam.bai" + File outputAlignmentFile = outputPrefix + ".bam" + File outputIndexFile = outputPrefix + ".bam.bai" } runtime { @@ -62,6 +65,7 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} + outputPrefix: {description: "The prefix of the output filename before the .bam extension." category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -69,7 +73,7 @@ task Mapping { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # outputs + # output outputAlignmentFile: {description: "Mapped bam file."} outputIndexFile: {description: "Bam index file."} } From 912754990f49d74b69a170bf68901e6ecd1f9557 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:52:02 +0200 Subject: [PATCH 829/902] Use a better output prefix Co-authored-by: Davy Cats --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 915fbb02..f8abbd64 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -25,7 +25,7 @@ task Mapping { String presetOption Boolean sort=true String sample - String outputPrefix = sample + ".align" + String outputPrefix = "./~{sample}.align" File referenceMMI File queryFile From 408757f683bf02d0bcf214cd72a4aee732d520d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:52:16 +0200 Subject: [PATCH 830/902] Add missing dirname call Co-authored-by: Davy Cats --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index f8abbd64..b00e249e 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -37,7 +37,7 @@ task Mapping { command { set -e - mkdir -p ~{outputPrefix} + mkdir -p $(dirname ~{outputPrefix}) pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ From 8e008554a71cb5de37c69f80321b0d4d39dcf750 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:58:13 +0200 Subject: [PATCH 831/902] Add missing comma --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index b00e249e..73e74c0c 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -65,7 +65,7 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} - outputPrefix: {description: "The prefix of the output filename before the .bam extension." category: "advanced"} + outputPrefix: {description: "The prefix of the output filename before the .bam extension.", category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From 7d6da07cd4dbe09e42cf343e9077d0118e4d1264 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Thu, 27 Mar 2025 17:48:03 +0100 Subject: [PATCH 832/902] Deprecated bedgraph option, produce it by default --- CHANGELOG.md | 1 + modkit.wdl | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd536e5e..337a68db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ version 6.0.0-dev + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index 930b6de9..7376a567 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -25,6 +25,7 @@ task Pileup { File bam File bamIndex String outputBed = "output.bedMethyl" + String outputBedGraph = "m_CG0_combined.bedgraph" File referenceFasta File referenceFastaFai @@ -34,7 +35,6 @@ task Pileup { Boolean cpg = false Boolean combineMods = false Boolean combineStrands = false - Boolean bedgraph = false String? ignore String logFilePath = "modkit.log" @@ -42,7 +42,6 @@ task Pileup { String memory = "4GiB" Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" - } command <<< @@ -58,15 +57,17 @@ task Pileup { ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ ~{true="--combine-strands" false="" combineStrands} \ - ~{true="--bedgraph" false="" bedgraph} \ --log-filepath ~{logFilePath} \ ~{bam} \ - ~{outputBed} + - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} >>> + # You can use modkit pileup ${bam_path} - | tee out.bedmethyl | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > out.bg to get both outputs at once without running anything twice. + # https://github.com/nanoporetech/modkit/issues/210#issuecomment-2181706374 + output { - File? out = outputBed # Normal mode - Array[File] outFiles = glob(outputBed + "/*") # Bedgraph mode + File out = outputBed # Normal mode + File outFiles = outputBedGraph # Bedgraph mode File logFile = logFilePath } @@ -104,4 +105,4 @@ task Pileup { outFiles: {description: "Output files when bedgraph = true."} logFile: {description: "The generated log file."} } -} \ No newline at end of file +} From 9d2a4735bf221410b7a1b6b3ad1cd5e5edad3423 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Mar 2025 14:33:52 +0100 Subject: [PATCH 833/902] Update parameter_meta for modkit --- modkit.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 7376a567..5ba1f501 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -84,14 +84,14 @@ task Pileup { bamIndex: {description: "The index for the input alignment file", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - outputBed: {description: "The output name where the data should be placed.", category: "common"} + outputBed: {description: "The output name where the bedMethyl file should be placed.", category: "common"} + outputBedgraph: {description: "The output name where the bedgraph file should be placed", category: "common"} intervalSize: {description: "Sets the interval size", category: "advanced"} includeBed: {description: "Bed file with regions to include", category: "advanced"} cpg: {description: "Whether to call only at cpg sites", category: "advanced"} combineMods: {description: "Whether to combine modifications in the output", category: "advanced"} combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} - bedgraph: {description: "Whether to create a folder instead with a bedgraph file", category: "advanced"} ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} logFilePath: {description: "Path where the log file should be written.", category: "advanced"} From feaacf40fb1fb2edf4588d63b5baee4f8eac18a2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Mar 2025 14:37:39 +0100 Subject: [PATCH 834/902] Fix typo --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 5ba1f501..9311e4da 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -85,7 +85,7 @@ task Pileup { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputBed: {description: "The output name where the bedMethyl file should be placed.", category: "common"} - outputBedgraph: {description: "The output name where the bedgraph file should be placed", category: "common"} + outputBedGraph: {description: "The output name where the bedgraph file should be placed", category: "common"} intervalSize: {description: "Sets the interval size", category: "advanced"} includeBed: {description: "Bed file with regions to include", category: "advanced"} From 9e057d6ce259e5fc96ffb04208c37bda8b43ec3e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 31 Mar 2025 14:04:15 +0200 Subject: [PATCH 835/902] split into separate files --- modkit.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 9311e4da..78df28f4 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -25,7 +25,7 @@ task Pileup { File bam File bamIndex String outputBed = "output.bedMethyl" - String outputBedGraph = "m_CG0_combined.bedgraph" + String outputBedGraph = "combined.bedgraph" File referenceFasta File referenceFastaFai @@ -59,7 +59,9 @@ task Pileup { ~{true="--combine-strands" false="" combineStrands} \ --log-filepath ~{logFilePath} \ ~{bam} \ - - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} + - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10 >> "~{outputBedGraph}_"$4"_"$6".bedGraph"}' + # Separately generate the combined file as well, so users can have a choice. + cat ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} >>> # You can use modkit pileup ${bam_path} - | tee out.bedmethyl | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > out.bg to get both outputs at once without running anything twice. @@ -67,7 +69,8 @@ task Pileup { output { File out = outputBed # Normal mode - File outFiles = outputBedGraph # Bedgraph mode + File outGraph = outputBedGraph # Normal mode + Array[File] outFiles = glob(outputBedGraph + "*.bedGraph") # Bedgraph mode File logFile = logFilePath } From e439d58c8e9584c8957a4ecb265ce5f7de9f96ce Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 14:06:31 +0200 Subject: [PATCH 836/902] Add Mosdepth task --- CHANGELOG.md | 1 + mosdepth.wdl | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 mosdepth.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index dd536e5e..986dfd13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 + Add VEP task. diff --git a/mosdepth.wdl b/mosdepth.wdl new file mode 100644 index 00000000..0f800769 --- /dev/null +++ b/mosdepth.wdl @@ -0,0 +1,106 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Mosdepth { + input { + File bam + File bamIndex + String prefix = "./out" + + String? chrom + # --by flag takes a BED file or an integer. So there need to be two inputs in WDL's typed system. + File? byBed + Int? byWindow + File? fasta + Int? flag + Int? includeFlag + + Boolean noPerBase = false + Boolean d4 = false + Boolean fastMode = false + + Int threads = 1 + String memory = "1GiB" + Int timeMinutes = 10 + ceil(size(bam, "G")) * 4 + String dockerImage = "quay.io/biocontainers/mosdepth:0.3.10--h4e814b3_1" + } + + command <<< + set -e + mkdir -p $(dirname ~{prefix}) + mosdepth \ + --threads ~{threads} \ + ~{"--chrom " + chrom} \ + ~{"--by " + byBed} \ + ~{"--by " + byWindow} \ + ~{"--fasta " + fasta} \ + ~{true="--no-per-base" false="" noPerBase} \ + ~{true="--d4" false="" d4} \ + ~{"--flag " + flag} \ + ~{"--include-flag " + includeFlag} \ + ~{true="--fast-mode" false="" fastMode} \ + ~{prefix} ~{bam} + >>> + + output { + File globalDist = "~{prefix}.mosdepth.global.dist.txt" + File summary = "~{prefix}.mosdepth.summary.txt" + File? perBaseBed = "~{prefix}.per-base.bed.gz" + File? regionsBed = "~{prefix}.regions.bed.gz" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bam: {description: "Input BAM or CRAM file.", category: "required"} + bamIndex: {description: "Index for the input BAM or CRAM file.", category: "required"} + prefix: {description: "Output prefix.", category: "common"} + + chrom: {description: "Chromosome to restrict depth calculation.", category: "advanced"} + byBed: {description: "Bed file with windows to include for the --by flag. Should not be used together with byWindow.", category: "common"} + byWindow: {description: "Integer window size for the --by flag. Should not be used together with byBed.", category: "advanced"} + fasta: {description: "FASTA file, only necessary when CRAM input is used.", category: "advanced"} + flag: {description: "Exclude reads with any of the bits in FLAG set.", category: "advanced"} + includeFlag: {description: "Only include reads with any of the bits in FLAG set.", category: "advanced"} + + noPerBase: {description: "Don't output per-base depth. Skipping this output will speed execution.", category: "common"} + d4: {description: "output per-base depth in d4 format.", category: "advanced"} + fastMode: {description: "Don't look at internal cigar operations or correct mate overlaps (recommended for most use-cases).", category: "common"} + + threads: {description: "How many threads to use.", category: "common"} + memory: {description: "How much memory to allocate.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + globalDist: {description: "Global distribution table file."} + summary: {description: "Summary table file."} + perBaseBed: {description: "Per base coverage BED file."} + regionsBed: {description: "Per region BED file, if byBed or byWindow is used."} + } +} \ No newline at end of file From 7bcac8ea2636cbeeae247d783c0dc5558bb0955a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:22:31 +0200 Subject: [PATCH 837/902] Update all samtools images --- samtools.wdl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index d724a692..2388813e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -72,7 +72,7 @@ task DictAndFaidx { String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputFile = basename(inputFile) @@ -119,7 +119,7 @@ task Faidx { String outputDir String memory = "2GiB" - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -168,7 +168,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -232,7 +232,7 @@ task FilterShortReadsBam { String memory = "1GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -278,7 +278,7 @@ task Flagstat { String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -318,7 +318,7 @@ task Index { String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } # Select_first is needed, otherwise womtool validate fails. @@ -369,7 +369,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -408,7 +408,7 @@ task Merge { Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -456,7 +456,7 @@ task Quickcheck { input { File inputBam - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -497,7 +497,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -560,7 +560,7 @@ task Split { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -669,7 +669,7 @@ task View { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputIndexPath = basename(outputFileName) + ".bai" From 435a719147253df23cad2674736d8d699b186e77 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:56:35 +0200 Subject: [PATCH 838/902] Task updates to samtools.wdl --- CHANGELOG.md | 6 +++++ samtools.wdl | 72 +++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986dfd13..8b95b904 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Update docker images in samtools.wdl ++ Add threads and compression levels to applicable tasks. Default to + compression level 1. ++ samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is + the name of the flag. ++ Unused javaXmx parameter removed from samtools DictAndFaidx + Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 diff --git a/samtools.wdl b/samtools.wdl index 2388813e..30e938b4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -24,11 +24,13 @@ task BgzipAndIndex { input { File inputFile String outputDir - String type = "vcf" + String preset = "vcf" + Int compressLevel = 1 + Int threads = 1 String memory = "2GiB" Int timeMinutes = 1 + ceil(size(inputFile, "GiB")) - String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" + String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1" } String outputGz = outputDir + "/" + basename(inputFile) + ".gz" @@ -36,8 +38,15 @@ task BgzipAndIndex { command { set -e mkdir -p "$(dirname ~{outputGz})" - bgzip -c ~{inputFile} > ~{outputGz} - tabix ~{outputGz} -p ~{type} + bgzip \ + --threads ~{threads} \ + --compress-level ~{compressLevel} \ + -c ~{inputFile} > ~{outputGz} + + tabix \ + --preset ~{preset} \ + --threads ~{threads - 1} \ + ~{outputGz} } output { @@ -46,6 +55,7 @@ task BgzipAndIndex { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -55,7 +65,7 @@ task BgzipAndIndex { # inputs inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} - type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + preset: {description: "The preset for the file (eg. vcf or bed) to be compressed and indexed.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -69,7 +79,6 @@ task BgzipAndIndex { task DictAndFaidx { input { File inputFile - String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -102,7 +111,6 @@ task DictAndFaidx { parameter_meta { # inputs inputFile: {description: "The input fasta file.", category: "required"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -163,7 +171,7 @@ task Fastq { Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter - Int? compressionLevel + Int compressionLevel = 1 Int threads = 1 String memory = "1GiB" @@ -184,8 +192,8 @@ task Fastq { ~{"-G " + excludeSpecificFilter} \ ~{true="-N" false="-n" appendReadNumber} \ ~{true="-O" false="" outputQuality} \ - ~{"-c " + compressionLevel} \ - ~{"--threads " + threads} + -c ~{compressionLevel} \ + "--threads " ~{threads - 1} } output { @@ -276,6 +284,8 @@ task Flagstat { File inputBam String outputPath + Int threads = 1 + String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -284,7 +294,9 @@ task Flagstat { command { set -e mkdir -p "$(dirname ~{outputPath})" - samtools flagstat ~{inputBam} > ~{outputPath} + samtools flagstat \ + --threads ~{threads - 1} + ~{inputBam} > ~{outputPath} } output { @@ -292,6 +304,7 @@ task Flagstat { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -316,6 +329,8 @@ task Index { String? outputBamPath + Int threads = 1 + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -334,7 +349,9 @@ task Index { mkdir -p "$(dirname ~{outputPath})" ln ~{bamFile} ~{outputPath} || cp ~{bamFile} ~{outputPath} fi - samtools index ~{outputPath} ~{bamIndexPath} + samtools index \ + --threads ~{threads -1} \ + ~{outputPath} ~{bamIndexPath} ' } @@ -344,6 +361,7 @@ task Index { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -367,6 +385,7 @@ task Markdup { input { File inputBam String outputBamPath + Int threads = 1 Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -375,7 +394,9 @@ task Markdup { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - samtools markdup ~{inputBam} ~{outputBamPath} + samtools markdup \ + --threads ~{threads - 1} \ + ~{inputBam} ~{outputBamPath} } output { @@ -383,6 +404,7 @@ task Markdup { } runtime { + cpu: threads docker: dockerImage time_minutes: timeMinutes } @@ -405,6 +427,10 @@ task Merge { String outputBamPath = "merged.bam" Boolean force = true + Boolean combineRGHeaders = false + Boolean combinePGHeaders = false + + Int compressionLevel = 1 Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) @@ -420,6 +446,9 @@ task Merge { samtools merge \ --threads ~{threads - 1} \ ~{true="-f" false="" force} \ + -l ~{compressionLevel} \ + ~{true="-c" false="" combineRGHeaders} \ + ~{true="-p" false="" combinePGHeaders} \ ~{outputBamPath} ~{sep=' ' bamFiles} samtools index ~{outputBamPath} ~{indexPath} } @@ -514,7 +543,7 @@ task Sort { -o ~{outputPath} \ ~{inputBam} samtools index \ - -@ ~{threads} \ + --threads ~{threads - 1} \ ~{outputPath} ~{bamIndexPath} } @@ -571,7 +600,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ - --threads ~{threads} \ + --threads ~{threads - 1} \ --write-index \ ~{inputBam} } @@ -610,10 +639,10 @@ task Tabix { input { File inputFile String outputFilePath = basename(inputFile) - String type = "vcf" + String preset = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" + String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1" } # FIXME: It is better to do the indexing on VCF creation. @@ -625,7 +654,7 @@ task Tabix { then ln ~{inputFile} ~{outputFilePath} || cp ~{inputFile} ~{outputFilePath} fi - tabix ~{outputFilePath} -p ~{type} + tabix ~{outputFilePath} -p ~{preset} } output { @@ -643,7 +672,7 @@ task Tabix { # inputs inputFile: {description: "The file to be indexed.", category: "required"} outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} - type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} + preset: {description: "The preset for the file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -666,6 +695,8 @@ task View { Int? MAPQthreshold File? targetFile + Boolean fast = false # Default should be true, unless a non-BAM format is preferred. So th + Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) @@ -682,11 +713,12 @@ task View { ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ ~{true="-u " false="" uncompressedBamOutput} \ + ~{true="--fast" false="" fast} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ ~{"-q " + MAPQthreshold} \ - ~{"--threads " + (threads - 1)} \ + --threads ~{threads - 1} \ ~{"--target-file " + targetFile} \ ~{inFile} samtools index ~{outputFileName} ~{outputIndexPath} From d20b313ea01c0dc3fe318206daac4d976c22bc5b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:58:17 +0200 Subject: [PATCH 839/902] Increase mosdepth default memory --- mosdepth.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mosdepth.wdl b/mosdepth.wdl index 0f800769..43e95614 100644 --- a/mosdepth.wdl +++ b/mosdepth.wdl @@ -39,7 +39,7 @@ task Mosdepth { Boolean fastMode = false Int threads = 1 - String memory = "1GiB" + String memory = "4GiB" Int timeMinutes = 10 + ceil(size(bam, "G")) * 4 String dockerImage = "quay.io/biocontainers/mosdepth:0.3.10--h4e814b3_1" } From 046eecb3af6887d6aad1c31a4521951822683259 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:59:19 +0200 Subject: [PATCH 840/902] Allocate more time for merging --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 30e938b4..915bb848 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -433,7 +433,7 @@ task Merge { Int compressionLevel = 1 Int threads = 1 String memory = "4GiB" - Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) + Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } From f5765ffd1e75964a43da36c500741610e005c554 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 10:20:18 +0200 Subject: [PATCH 841/902] Update clair3 image --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 4d9092f2..57984a32 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -35,7 +35,7 @@ task Clair3 { Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) - String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" + String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" } String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" From bfd433dd4f698bf141c7add6cc42ea58d56ca3a2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 10:25:45 +0200 Subject: [PATCH 842/902] Update deepvariant image --- deepvariant.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 2d212000..e9e6c18c 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -35,11 +35,11 @@ task RunDeepVariant { String? outputGVcfIndex File? regions String? sampleName - Boolean? VCFStatsReport = true + Boolean VCFStatsReport = true String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.6.1" + String dockerImage = "google/deepvariant:1.8.0" } command { From cfbc34deb566ddb2ce0561168c7fb3dd3b0ae1e6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 11:30:12 +0200 Subject: [PATCH 843/902] Update several images --- CHANGELOG.md | 1 + modkit.wdl | 2 +- multiqc.wdl | 2 +- picard.wdl | 34 +++++++++++++++++----------------- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b95b904..8c13cacc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ version 6.0.0-dev + samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is the name of the flag. + Unused javaXmx parameter removed from samtools DictAndFaidx ++ Update Picard images + Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 diff --git a/modkit.wdl b/modkit.wdl index 930b6de9..6a7d9b4d 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -41,7 +41,7 @@ task Pileup { Int threads = 8 String memory = "4GiB" Int timeMinutes = 2880 / threads # 2 Days / threads - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" } diff --git a/multiqc.wdl b/multiqc.wdl index a2e32cdb..fae52178 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0" + String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/picard.wdl b/picard.wdl index 6628cf0e..fd072523 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4GiB" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -210,7 +210,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -336,7 +336,7 @@ task CollectRnaSeqMetrics { String memory = "9GiB" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -394,7 +394,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -456,7 +456,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -569,7 +569,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3GiB" - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -621,7 +621,7 @@ task GatherBamFiles { Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -684,7 +684,7 @@ task GatherVcfs { String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputVcfs, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -753,7 +753,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -836,7 +836,7 @@ task MergeVCFs { String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputVCFs, "GiB")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -892,7 +892,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17GiB" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" File? noneFile } @@ -953,7 +953,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4GiB" - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -996,7 +996,7 @@ task SortSam { # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -1058,7 +1058,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(vcfFiles, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } @@ -1108,7 +1108,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(inputVcf, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -1163,7 +1163,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 360 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { From d31f74badd4e6d8f8c1f397c4478ffa20e32437e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:02:11 +0200 Subject: [PATCH 844/902] Make resource requirements for pbmm2 and minimap2 somewhat equal --- CHANGELOG.md | 2 ++ minimap2.wdl | 8 ++++---- pbmm2.wdl | 32 +++++++++++++++++++++++--------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c13cacc..5fa636d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ version 6.0.0-dev + Unused javaXmx parameter removed from samtools DictAndFaidx + Update Picard images + Add Mosdepth task. ++ pbmm2 loses the sort parameter. Output is now always sorted. ++ pbmm2 gets an unmapped parameter. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 + Add VEP task. diff --git a/minimap2.wdl b/minimap2.wdl index da301bd3..a7584beb 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -86,8 +86,6 @@ task Mapping { File queryFile Int compressionLevel = 1 - Int additionalSortThreads = 1 - Int sortMemoryGb = 1 Boolean nameSorted = false # MM, ML, MN -> Methylation flags # Also keep the following flags for Sequali to be able to run on the mapped bam file and get ONT information. @@ -112,6 +110,8 @@ task Mapping { String? howToFindGTAG String? readgroup + Int sortThreads = 2 + Int sortMemoryGb = 1 Int cores = 8 String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) @@ -147,7 +147,7 @@ task Mapping { - \ | samtools sort \ ~{true="-N" false="" nameSorted} \ - -@ ~{additionalSortThreads} \ + --threads ~{sortThreads - 1} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam @@ -184,7 +184,7 @@ task Mapping { tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} compressionLevel: {description: "compressionLevel for the output file", category: "advanced"} - additionalSortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} nameSorted: {description: "Output a name sorted file instead", category: "common"} diff --git a/pbmm2.wdl b/pbmm2.wdl index 73e74c0c..23133278 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -23,25 +23,36 @@ version 1.0 task Mapping { input { String presetOption - Boolean sort=true + Boolean unmapped = false String sample String outputPrefix = "./~{sample}.align" File referenceMMI File queryFile - Int cores = 4 - String memory = "30GiB" - Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) + Int sortMemoryGb = 1 + Int sortThreads = 2 + Int cores = 8 + String memory = "24GiB" + # Slightly higher than minimap2 as compression level can not be set. + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 400 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" } + # Use cores+sortThreads to set the number of threads. Internally pbmm2 + # allocates cores - sortThreads to alignment. This leads to underutilization + # of the requested resources. Sorting uses very little CPU until the point + # comes that the memory is full and the temporary file needs to be written. + # At this point the alignment halts because the pipe is full. command { set -e mkdir -p $(dirname ~{outputPrefix}) pbmm2 align \ --preset ~{presetOption} \ - ~{true="--sort" false="" sort} \ - -j ~{cores} \ + --sort \ + ~{true="--unmapped" false="" unmapped} \ + --num-threads ~{cores + sortThreads} \ + --sort-memory ~{sortMemoryGb}G \ + --sort-threads ~{sortThreads} \ ~{referenceMMI} \ ~{queryFile} \ --sample ~{sample} \ @@ -63,15 +74,18 @@ task Mapping { parameter_meta { # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "required"} - sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} outputPrefix: {description: "The prefix of the output filename before the .bam extension.", category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + unmapped: {description: "Include unmapped reads in the output.", category: "common"} + + sortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} # output outputAlignmentFile: {description: "Mapped bam file."} From 046947847255c3323524f1c92004a66ec026b7c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:21:51 +0200 Subject: [PATCH 845/902] Increase default thread count for samtools merge --- CHANGELOG.md | 1 + samtools.wdl | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fa636d8..0781e4b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Samtools merge default thread count increased to 8. + Update docker images in samtools.wdl + Add threads and compression levels to applicable tasks. Default to compression level 1. diff --git a/samtools.wdl b/samtools.wdl index 915bb848..7a2223f6 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -431,7 +431,8 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - Int threads = 1 + # Merging is often a bottleneck. Set a high number of threads to decrease wall clock time. + Int threads = 8 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From b063b9ba79e41f3d20c64ded779a2953a1f7ec55 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:53:28 +0200 Subject: [PATCH 846/902] more time for clair3 --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 57984a32..5a6154af 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,7 +34,7 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + Int timeMinutes = 10 + ceil(size(bam, "G") * 400 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" } From d502298c8ec0e594cace54e573e68b2e7a4d9041 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 14:54:27 +0200 Subject: [PATCH 847/902] Make sequali runtime dependent on input file size --- sequali.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sequali.wdl b/sequali.wdl index b43cf281..cbca3653 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -29,7 +29,7 @@ task Sequali { Int threads = 2 String memory = "4GiB" String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" - Int timeMinutes = 59 + Int timeMinutes = 10 + ceil(size(reads, "GiB") + size(mate_reads, "GiB")) * 4 } command <<< From b942c7ed0a833c830aabb227a15d78ca89aecc3e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 14:58:12 +0200 Subject: [PATCH 848/902] Slightly higher requirements for pbmm2 than minimap2 --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 23133278..9155e7b2 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -32,7 +32,7 @@ task Mapping { Int sortMemoryGb = 1 Int sortThreads = 2 Int cores = 8 - String memory = "24GiB" + String memory = "30GiB" # Slightly higher than minimap2 as compression level can not be set. Int timeMinutes = 1 + ceil(size(queryFile, "G") * 400 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" From d2ac7b2ad030a00d83aa5a0100f79ec5e16dd5d1 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 2 Apr 2025 18:20:33 +0200 Subject: [PATCH 849/902] Add filterThreshold, filterPercent to modkit pileup --- modkit.wdl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 78df28f4..a611a620 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -31,6 +31,8 @@ task Pileup { Int? intervalSize File? includeBed + String? filterThreshold + String? filterPercentile Boolean cpg = false Boolean combineMods = false @@ -57,6 +59,8 @@ task Pileup { ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ ~{true="--combine-strands" false="" combineStrands} \ + ~{"--filter-percentile " + filterPercentile} \ + ~{"--filter-threshold " + filterThreshold} \ --log-filepath ~{logFilePath} \ ~{bam} \ - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10 >> "~{outputBedGraph}_"$4"_"$6".bedGraph"}' @@ -97,12 +101,14 @@ task Pileup { combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} logFilePath: {description: "Path where the log file should be written.", category: "advanced"} + filterThreshold: {description: "Global filter threshold can be specified with by a decimal number (e.g. 0.75). Otherwise the automatic filter percentile will be used.", category: "advanced"} + filterPercentile: {description: "This defaults to 0.1, to remove the lowest 10% confidence modification calls, but can be manually adjusted", category: "advanced"} threads: {description: "The number of threads to use for variant calling.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - + # output out: {description: "The output bed files. Not available when bedgraph = true."} outFiles: {description: "Output files when bedgraph = true."} From 204821385c3d176c3425d7052b6f3905ff46541d Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 2 Apr 2025 18:21:11 +0200 Subject: [PATCH 850/902] Add a summary task --- CHANGELOG.md | 2 ++ modkit.wdl | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ed79b5b..57519f04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ version 6.0.0-dev + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. + Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. ++ Add support for filterThreshold/filterPercent for `modkit.Pileup`. ++ Add `modkit.Summary` task. version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index a611a620..7546458a 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -115,3 +115,65 @@ task Pileup { logFile: {description: "The generated log file."} } } + +task Summary { + input { + File bam + File bamIndex + + String summary = "modkit.summary.txt" + + Boolean sample = true + Int? numReads # = 10042 + Float? samplingFrac # = 0.1 + Int? seed + + Int threads = 4 + String memory = ceil(size(bam, "GiB") * 0.20) + 10 # Based on a linear model with some fudge (y=-0.13x - 4). + Int timeMinutes = 2880 / threads # 2 Days / threads + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p $(dirname ~{summary}) + + modkit summary \ + --threads ~{threads} \ + ~{true="" false="--no-sampling" sample} \ + ~{"--num-reads " + numReads} \ + ~{"--sampling-frac " + samplingFrac} \ + ~{"--seed " + seed} \ + ~{bam} > ~{summary} + >>> + + output { + File summaryReport = summary # Normal mode + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} From a9ec6faf3de64e110209ed2c81b1272e765a6247 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Apr 2025 13:24:46 +0200 Subject: [PATCH 851/902] Downgrade deepvariant because of a bug --- deepvariant.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index e9e6c18c..c700416f 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -39,7 +39,9 @@ task RunDeepVariant { String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.8.0" + # Version 1.8.0 has a bug. + # https://github.com/google/deepvariant/issues/912 + String dockerImage = "google/deepvariant:1.6.1" } command { From 741f9708383ff29d0f6f548f9fffad0b8eb7ab37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Apr 2025 13:26:56 +0200 Subject: [PATCH 852/902] Increase time limit for VEP --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index e99c9fdb..2c1f923b 100644 --- a/vep.wdl +++ b/vep.wdl @@ -36,7 +36,7 @@ task Vep { String memory = "8GiB" # Account time for unpacking the cache. - Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 3) + Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 15) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } From 4fe49b8ef3f1bae978b2fa07ac6e08a282e2f91f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 09:56:43 +0200 Subject: [PATCH 853/902] Update samtools parameter_meta --- CHANGELOG.md | 2 +- samtools.wdl | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0781e4b1..1180578a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ version 6.0.0-dev --------------------------- + Samtools merge default thread count increased to 8. + Update docker images in samtools.wdl -+ Add threads and compression levels to applicable tasks. Default to ++ Add threads and compression levels to applicable tasks in samtools. Default to compression level 1. + samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is the name of the flag. diff --git a/samtools.wdl b/samtools.wdl index 7a2223f6..cd24e6e9 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -69,6 +69,8 @@ task BgzipAndIndex { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + compressLevel: {description: "Set compression level.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs compressed: {description: "Compressed input file."} @@ -317,6 +319,7 @@ task Flagstat { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs flagstat: {description: "The number of alignments for each FLAG type."} @@ -374,6 +377,7 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs indexedBam: {description: "BAM file that was indexed."} @@ -415,6 +419,7 @@ task Markdup { outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs outputBam: {description: "BAM file with duplicate alignments marked."} @@ -471,6 +476,10 @@ task Merge { bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + + combineRGHeaders: {description: "Combine @RG headers with colliding IDs", category: "advanced"} + combinePGHeaders: {description: "Combine @PG headers with colliding IDs", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -567,7 +576,7 @@ task Sort { sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -696,7 +705,7 @@ task View { Int? MAPQthreshold File? targetFile - Boolean fast = false # Default should be true, unless a non-BAM format is preferred. So th + Boolean fast = true # Sets compression level to 1. Int threads = 1 String memory = "1GiB" @@ -707,14 +716,15 @@ task View { String outputIndexPath = basename(outputFileName) + ".bai" # Always output to bam and output header. + # -u should be after --fast, and will override it in that case. command { set -e mkdir -p "$(dirname ~{outputFileName})" samtools view -b \ ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ - ~{true="-u " false="" uncompressedBamOutput} \ ~{true="--fast" false="" fast} \ + ~{true="-u " false="" uncompressedBamOutput} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -741,6 +751,7 @@ task View { # inputs inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} + fast: {description: "Sets compression level to 1. Set to true by default.", category: "common"} uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} From 6a78f520a6efee6def3fcc257f5ea3be02daf8cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:21:57 +0200 Subject: [PATCH 854/902] Increase deep variant shards and explain memory usage --- deepvariant.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index c700416f..b0ed2a19 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -30,13 +30,17 @@ task RunDeepVariant { String outputVcf = "sample.vcf.gz" String? postprocessVariantsExtraArgs File? customizedModel - Int numShards = 4 + Int numShards = 8 String? outputGVcf String? outputGVcfIndex File? regions String? sampleName Boolean VCFStatsReport = true + # Most of the memory used is at the end, in the step where the variants + # are merged. This is a single-threaded high memory step. The number + # of shards does not influence the memory so much. + # The provided memory here is enough for merge human chromosome 1. String memory = "48GiB" Int timeMinutes = 5000 # Version 1.8.0 has a bug. From ba35d987ca3fe3c27a01034d60cd2ab09369ab31 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:36:03 +0200 Subject: [PATCH 855/902] Set a lower number of threads for samtools merge to decrease waste --- samtools.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index cd24e6e9..cb8dbd55 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -436,8 +436,9 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - # Merging is often a bottleneck. Set a high number of threads to decrease wall clock time. - Int threads = 8 + # Merging is often a bottleneck. With compression level 1 however, + # more than three threads does not add more benefit. + Int threads = 3 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From 8b41a7feddf6e1f29af7fd825cad6a0ae6811687 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:45:14 +0200 Subject: [PATCH 856/902] Dynamically set samtools merge threads --- samtools.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index cb8dbd55..7dd9ecc1 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -436,9 +436,8 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - # Merging is often a bottleneck. With compression level 1 however, - # more than three threads does not add more benefit. - Int threads = 3 + # Use one thread per input + one for the output + one for merging + Int threads = length(bamFiles) + 2 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From 17cf284d2c54212b29cdf4e6a347adc0e0a0c458 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 16:38:42 +0200 Subject: [PATCH 857/902] Also use threads for faster indexing --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 7dd9ecc1..811f56e0 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -456,7 +456,7 @@ task Merge { ~{true="-c" false="" combineRGHeaders} \ ~{true="-p" false="" combinePGHeaders} \ ~{outputBamPath} ~{sep=' ' bamFiles} - samtools index ~{outputBamPath} ~{indexPath} + samtools index -@ ~{threads - 1} ~{outputBamPath} ~{indexPath} } output { From 1fae30492bdff1af750ac963d565cbb16cc6572b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 16:53:59 +0200 Subject: [PATCH 858/902] Add missing parameter_meta --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 811f56e0..743fce0c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -480,6 +480,8 @@ task Merge { combineRGHeaders: {description: "Combine @RG headers with colliding IDs", category: "advanced"} combinePGHeaders: {description: "Combine @PG headers with colliding IDs", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d485e17399c3482aa109e0d1055c2b2bac9d93a4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Apr 2025 08:30:54 +0200 Subject: [PATCH 859/902] Update CHANGELOG.md --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1180578a..96adc8fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- -+ Samtools merge default thread count increased to 8. ++ MultiQC image updated to version 1.28 ++ Samtools merge now has options added for merging RG and PG headers. ++ Samtools merge default thread count increased based on the number of files. + Update docker images in samtools.wdl + Add threads and compression levels to applicable tasks in samtools. Default to compression level 1. From 847ad71a26b3a1ddc1fc06c2fda349fc620ad2b5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 19:40:40 +0200 Subject: [PATCH 860/902] Update vt to allow a filter expression and compressed indexed output --- CHANGELOG.md | 1 + vt.wdl | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96adc8fa..dfa40b75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. + Samtools merge default thread count increased based on the number of files. diff --git a/vt.wdl b/vt.wdl index 4da2d8cd..4ced1d2a 100644 --- a/vt.wdl +++ b/vt.wdl @@ -27,27 +27,39 @@ task Normalize { File referenceFasta File referenceFastaFai Boolean ignoreMaskedRef = false - String outputPath = "./vt/normalized_decomposed.vcf" + String outputPath = "./vt/normalized_decomposed.vcf.gz" + String? filterExpression + + Int compressionLevel = 1 String memory = "4GiB" - Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + Int timeMinutes = 10 + ceil(size(inputVCF, "GiB") * 240) + String dockerImage = "quay.io/biocontainers/vt:0.57721--h2419454_12" } command { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" - vt normalize ~{inputVCF} \ + vt view -h \ + ~{"-f " + filterExpression} \ + ~{inputVCF} \ + | vt normalize - \ -r ~{referenceFasta} \ ~{true="-m " false="" ignoreMaskedRef} \ - | vt decompose -s - -o ~{outputPath} + | vt decompose -s - \ + | vt view - \ + -c ~{compressionLevel} \ + -o ~{outputPath} + vt index ~{outputPath} } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" } runtime { + cpu: 1 memory: memory time_minutes: timeMinutes docker: dockerImage @@ -61,11 +73,15 @@ task Normalize { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + filterExpression: {description: "See https://genome.sph.umich.edu/wiki/Vt#Filters for valid expressions.", category: "common"} + compressionLevel: {description: "Compression level for the out vcf.gz file.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Normalized & decomposed VCF file."} + outputVcf: {description: "Normalized and decomposed VCF file."} + outputVcfIndex: {description: "Index for normalized and decomposed VCF file."} } } From 57018dd55c43af0013f48a61e5119128ccd87d3f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Apr 2025 11:28:56 +0200 Subject: [PATCH 861/902] Properly quote vt filter --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 4ced1d2a..635641e9 100644 --- a/vt.wdl +++ b/vt.wdl @@ -41,7 +41,7 @@ task Normalize { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" vt view -h \ - ~{"-f " + filterExpression} \ + ~{"-f '" + filterExpression}~{true="'" false="" defined(filterExpression)} \ ~{inputVCF} \ | vt normalize - \ -r ~{referenceFasta} \ From e39fe10360989d5074580034a4df030e16d27f4c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:08:55 +0200 Subject: [PATCH 862/902] do not intermingle singletons --- samtools.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index d724a692..ef89477d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -157,6 +157,7 @@ task Fastq { String outputRead1 String? outputRead2 String? outputRead0 + String? outputReadS Boolean appendReadNumber = false Boolean outputQuality = false @@ -177,8 +178,10 @@ task Fastq { samtools collate -u -O ~{inputBam} | \ samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ + ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ + ~{"-s " + outputReadS} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -192,6 +195,7 @@ task Fastq { File read1 = outputRead1 File? read2 = outputRead2 File? read0 = outputRead0 + File? readS = outputReadS } runtime { @@ -207,6 +211,7 @@ task Fastq { outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} + outputReadS: {description: "The location singleton reads should be written to.", category: "advanced"} appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`.", category: "advanced"} outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`.", category: "advanced"} From b9319418b7a96a0046b9c034649930ccd5cf4fa9 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:09:16 +0200 Subject: [PATCH 863/902] The caches in containers caused issues --- samtools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index ef89477d..315a00b5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,6 +571,9 @@ task Split { command { set -e mkdir -p "~{outputPath}/rg/" + + export XDG_CACHE_HOME=$PWD/.cache/ + export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 8ede8b774a0296fe484e9f78e25d5d358828099e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:15:01 +0200 Subject: [PATCH 864/902] add biopets validate fastq --- biopet.wdl | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 biopet.wdl diff --git a/biopet.wdl b/biopet.wdl new file mode 100644 index 00000000..ea8a36c8 --- /dev/null +++ b/biopet.wdl @@ -0,0 +1,60 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +task ValidateFastq { + input { + File inputRead1 + File? inputRead2 + + String memory = "1GiB" + Int timeMinutes = 5 + ceil(size(inputRead1, "GiB")) + String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--hdfd78af_3" + } + + command { + set -e + java -jar /usr/local/share/biopet-validatefastq-0.1.1-3/validatefastq-assembly-0.1.1.jar \ + --fastq1 ~{inputRead1} \ + ~{"--fastq2 " + inputRead2} + } + + output { + } + + runtime { + cpu: 1 + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputRead1: {description: "The location of the first FASTQ file (first reads for pairs, in case of paired-end sequencing).", category: "required"} + inputRead2: {description: "The location of the paired end reads.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 8493c77e477c5522b0947948b47e35be04974fc7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:19:23 +0200 Subject: [PATCH 865/902] require being explicit about locations --- samtools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 315a00b5..5bb2fb82 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -177,7 +177,6 @@ task Fastq { mkdir -p "$(dirname ~{outputRead1})" samtools collate -u -O ~{inputBam} | \ samtools fastq \ - ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ From 347ed91d4bff4306cea0074ca7f1c7fa2ff517b7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:20:09 +0200 Subject: [PATCH 866/902] Probably unnecessary --- samtools.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 5bb2fb82..0ef1419c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,8 +571,6 @@ task Split { set -e mkdir -p "~{outputPath}/rg/" - export XDG_CACHE_HOME=$PWD/.cache/ - export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 7ff2ac2c1ebab33a3872297beb189e648eb90724 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:22:10 +0200 Subject: [PATCH 867/902] documentation --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986dfd13..f20dc82d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,8 @@ version 6.0.0-dev + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Add `biopet.ValidateFastq` to check your fastq files for pairing and other correctness. ++ **Breaking**: `samtools.Fastq` now requires defining your singleton read location. This only affects you if you were previously using this task with only a single output read file. version 5.2.0 --------------------------- From 1ee07a657fd46f6dc227573c2c59d7ef4d0cd4b9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 10:49:41 +0200 Subject: [PATCH 868/902] add -no-upstream to snpeff task --- snpeff.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index 0f14e5b5..8718e01b 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -32,6 +32,7 @@ task SnpEff { Boolean hgvs = true Boolean lof = true Boolean noDownstream = false + Boolean noUpstream = false Boolean noIntergenic = false Boolean noShiftHgvs = false Int? upDownStreamLen @@ -39,7 +40,7 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" + String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" } command { @@ -55,6 +56,7 @@ task SnpEff { ~{true="-hgvs" false="-noHgvs" hgvs} \ ~{true="-lof" false="-noLof" lof} \ ~{true="-no-downstream" false="" noDownstream} \ + ~{true="-no-upstream" false="" noUpstream} \ ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ @@ -82,6 +84,7 @@ task SnpEff { hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"} lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"} noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"} + noUpstream: {description: "Equivalent to the `-no-upstream` flag.", category: "advanced"} noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"} noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"} upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"} From d9d989e07649ac3177f6464100e192418e716ce3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:22:36 +0200 Subject: [PATCH 869/902] Add snpsift filter --- CHANGELOG.md | 2 ++ snpsift.wdl | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 snpsift.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa40b75..7e209f1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Added a task for SnpSift filter. ++ Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. diff --git a/snpsift.wdl b/snpsift.wdl new file mode 100644 index 00000000..0bb413f6 --- /dev/null +++ b/snpsift.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Filter { + input { + File vcf + File? vcfIndex + String filterExpression + String outputPath = "./snpsift_filter.vcf" + + String memory = "9GiB" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + } + + command { + SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + filter \ + "~{filterExpression}" \ + ~{vcf} \ + > ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes # !UnknownRuntimeKey + memory: memory + } + + parameter_meta { + vcf: {description: "A VCF file to filter.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "common"} + filterExpression: {description: "The SnpSift filtering expression.", category: "required"} + outputPath: {description: "The path to write the output to.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From d4eb18d70d68e5c75539c272bf0db065e5f0bf71 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:35:56 +0200 Subject: [PATCH 870/902] add region input to bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 5 ++++- snpsift.wdl | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e209f1a..f13ab24b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. diff --git a/bcftools.wdl b/bcftools.wdl index 7df8911d..11864a00 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -349,6 +349,7 @@ task View { String? exclude String? include + String? region Array[String] samples = [] String memory = "256MiB" @@ -368,7 +369,8 @@ task View { ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ - ~{inputFile} + ~{inputFile} \ + ~{region} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } @@ -390,6 +392,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + region: {description: "The region to retrieve from the VCF file.", category: "common"} excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} samples: {description: "A list of sample names to include.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/snpsift.wdl b/snpsift.wdl index 0bb413f6..5bac6484 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -36,6 +36,8 @@ task Filter { } command { + set -e + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ "~{filterExpression}" \ From ecd2242e9a71f352a6b11683a969f9f5804cb18d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 12:14:13 +0200 Subject: [PATCH 871/902] add an ipnut for an index file in bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f13ab24b..7d5ad41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. diff --git a/bcftools.wdl b/bcftools.wdl index 11864a00..b923781c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -344,6 +344,7 @@ task Stats { task View { input { File inputFile + File? inputFileIndex String outputPath = "output.vcf" Boolean excludeUncalled = false @@ -389,6 +390,7 @@ task View { parameter_meta { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} + inputFileIndex: {description: "the index for the input file.", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} From abcddcda79a0821ef86bb0d1b40f2e5b7264e829 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 11:56:23 +0200 Subject: [PATCH 872/902] fix wdlTools parsing issue in bcftools annotate --- CHANGELOG.md | 2 ++ bcftools.wdl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d5ad41d..7ad69a3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Fixed an issue with the parameter_meta section of bcftools annotate + which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. diff --git a/bcftools.wdl b/bcftools.wdl index b923781c..6200a1a1 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -111,7 +111,7 @@ task Annotate { collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} - newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} + newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\\_%POS').", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} From 748fe367e1964e5014cdb60a3def6976f2846d3c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 14:51:52 +0200 Subject: [PATCH 873/902] change name of snpsift task --- snpsift.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpsift.wdl b/snpsift.wdl index 5bac6484..6b6a1feb 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -22,7 +22,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Filter { +task SnpSiftFilter { input { File vcf File? vcfIndex From 2fc90c9790b41781ca35144e0d495f293a614382 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 16:05:27 +0200 Subject: [PATCH 874/902] add a useless ls to check if a dnanexus error is caused by lazy loading --- bcftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 6200a1a1..5ab04c1c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,6 +362,8 @@ task View { command { set -e + ls ~{inputFileIndex} + mkdir -p "$(dirname ~{outputPath})" bcftools view \ ~{"--exclude " + exclude} \ From 82a5715109d7c352c016d2672cea27b0ab4eb7f0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 28 May 2025 09:14:43 +0200 Subject: [PATCH 875/902] add ls to snpeff, bcftools view and snpsift so I can see the paths when run on dnanexus --- bcftools.wdl | 2 +- snpeff.wdl | 1 + snpsift.wdl | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5ab04c1c..0381d4cf 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,7 +362,7 @@ task View { command { set -e - ls ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index 8718e01b..924db8db 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -45,6 +45,7 @@ task SnpEff { command { set -e + ls ~{vcf} ~{vcfIndex} mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ diff --git a/snpsift.wdl b/snpsift.wdl index 6b6a1feb..5daacd36 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -37,6 +37,8 @@ task SnpSiftFilter { command { set -e + ls ~{vcf} ~{vcfIndex} + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ From 0513965516fab2b2a6a4c9d146813e65ffa77b19 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 14:04:40 +0200 Subject: [PATCH 876/902] Update modkit.wdl --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 7546458a..424ba755 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 0.20) + 10 # Based on a linear model with some fudge (y=-0.13x - 4). + String memory = ceil(size(bam, "GiB") * 110) + 40 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From 58b52865e986970b7c49d10096afbf1d0eec8e84 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 15:37:47 +0200 Subject: [PATCH 877/902] Update modkit.wdl More reasonable bounds --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 424ba755..094f0041 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 110) + 40 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). + String memory = ceil(size(bam, "GiB") * 115) + 4 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From 5d4f097ad010fb12c4b7599511eaafc741b64932 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 16:01:43 +0200 Subject: [PATCH 878/902] re-correct it. --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 094f0041..4aecb517 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 115) + 4 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). + String memory = ceil(size(bam, "GiB") * 0.1) + 5 # Based on a linear model with some fudge (memory = 0.07540 * file_size - 0.6). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From eafceb0f98e68feb884f8a947c15c29a2e52eb5b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 3 Jun 2025 15:52:08 +0200 Subject: [PATCH 879/902] WIP add option to output compressed VCF files to snpeff and snpsift --- snpeff.wdl | 10 ++++++++-- snpsift.wdl | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/snpeff.wdl b/snpeff.wdl index 924db8db..e1b520af 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -40,9 +40,12 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" + # Multicontainer with snpeff 5.2 and bgzip/tabix 1.19.1 + String dockerImage = "quay.io/biocontainers/mulled-v2-2fe536b56916bd1d61a6a1889eb2987d9ea0cd2f:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} @@ -61,12 +64,15 @@ task SnpEff { ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} rm -r $PWD/data } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { diff --git a/snpsift.wdl b/snpsift.wdl index 5daacd36..d964c255 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -44,11 +44,14 @@ task SnpSiftFilter { filter \ "~{filterExpression}" \ ~{vcf} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { From 16656ff77fa9f88577298fd7e8cc00c5eba02004 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 5 Jun 2025 11:38:01 +0200 Subject: [PATCH 880/902] update changelog, fix missing variable --- CHANGELOG.md | 1 + snpsift.wdl | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ad69a3e..7de262af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. diff --git a/snpsift.wdl b/snpsift.wdl index d964c255..4c354f48 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -32,9 +32,12 @@ task SnpSiftFilter { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + # Multicontainer with SnpSift 5.2 and bgzip/tabix 1.22 + String dockerImage = "quay.io/biocontainers/mulled-v2-d4bc0c23eb1d95c7ecff7f0e8b3a4255503fd5d4:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} From e9189a7f5d61a46d1deec0108900a11d70630933 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 6 Jun 2025 10:55:53 +0200 Subject: [PATCH 881/902] missing trailing slash breaks samtools flagstat --- samtools.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 743fce0c..ac2e868a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -296,9 +296,10 @@ task Flagstat { command { set -e mkdir -p "$(dirname ~{outputPath})" + samtools flagstat \ - --threads ~{threads - 1} - ~{inputBam} > ~{outputPath} + --threads ~{threads - 1} \ + ~{inputBam} > ~{outputPath} } output { From 69a9c0a6751f78cfaa75c325fc49425113e268b1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 11:56:44 +0200 Subject: [PATCH 882/902] Add a task for bcftools norm --- CHANGELOG.md | 1 + bcftools.wdl | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7de262af..003aa97a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add a task for bcftools norm. + Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. diff --git a/bcftools.wdl b/bcftools.wdl index 0381d4cf..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -180,6 +180,67 @@ task Filter { } } +task Norm { + input { + File inputFile + File? inputFileIndex + String outputPath = "output.vcf.gz" + + File? fasta + String? regions + Boolean splitMultiallelicSites = false + + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + + command { + set -e + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + + mkdir -p "$(dirname ~{outputPath})" + bcftools norm \ + -o ~{outputPath} \ + -O ~{true="z" false="v" compressed} \ + ~{"--regions " + regions} \ + ~{"--fasta " + fasta} \ + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + + ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} + } + + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + fasta: {description: "Equivalent to bcftools norm's `--fasta` option.", category: "advanced"} + regions: {description: "Equivalent to bcftools norm's `--regions` option.", category: "advanced"} + splitMultiallelicSites: {description: "Whether multiallelic lines should be split up.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file."} + outputVcfIndex: {description: "Index of sorted VCF file."} + } +} + task Sort { input { File inputFile From 5d4f5a7fa3846dea7b8a16fce9c47d8674a5f260 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:26:56 +0200 Subject: [PATCH 883/902] more time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..5fb06016 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c97c55a47411b2395289ed3bf0357d8686dc7350 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:38:14 +0200 Subject: [PATCH 884/902] more time for bcftools Norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5fb06016..56564b17 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 + Int timeMinutes = 5 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 88ac2526f86f5a89d6de0fe74077f6bab05baf8d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:52:37 +0200 Subject: [PATCH 885/902] reset time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 56564b17..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 3c8ec631930d4ec7df1d01ba802d5943257dfd42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:58:40 +0200 Subject: [PATCH 886/902] fix bcftools norm --- bcftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..ae2dee4e 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -207,7 +207,8 @@ task Norm { -O ~{true="z" false="v" compressed} \ ~{"--regions " + regions} \ ~{"--fasta " + fasta} \ - ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} \ + ~{inputFile} ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} } From 5dab6c7b08f05e831ea110b44acdefedc298f67b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 13:36:41 +0200 Subject: [PATCH 887/902] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index ae2dee4e..b48956cc 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "2GiB" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 99c562c5e8ed51e8a2a04ec5dc72dada5248ff9e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 15:08:05 +0200 Subject: [PATCH 888/902] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index b48956cc..7e297bc7 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "10GiB" + String memory = "64GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 99b9aca2b67d6a5d138c0b4dc9317f6b03bbc395 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 11 Jun 2025 14:24:53 +0200 Subject: [PATCH 889/902] Disable ai in multiqc --- CHANGELOG.md | 1 + multiqc.wdl | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44f71e4b..e0b036ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ version 6.0.0-dev + Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. ++ Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. version 5.2.0 --------------------------- diff --git a/multiqc.wdl b/multiqc.wdl index fae52178..18667b91 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -39,6 +39,7 @@ task MultiQC { # This must be actively enabled in my opinion. # The tools default is to upload. Boolean megaQCUpload = false + Boolean enableAi = false Int? dirsDepth String? title @@ -124,6 +125,7 @@ task MultiQC { ~{true="--lint" false="" lint} \ ~{true="--pdf" false="" pdf} \ ~{false="--no-megaqc-upload" true="" megaQCUpload} \ + ~{false="--no-ai" true="" enableAi} \ ~{"--config " + config} \ ~{"--cl-config " + clConfig } \ ~{reportDir} @@ -159,6 +161,7 @@ task MultiQC { lint: {description: "Equivalent to MultiQC's `--lint` flag.", category: "advanced"} pdf: {description: "Equivalent to MultiQC's `--pdf` flag.", category: "advanced"} megaQCUpload: {description: "Opposite to MultiQC's `--no-megaqc-upload` flag.", category: "advanced"} + enableAi: {description: "Opposite to MultiQC's `--no-ai` flag.", category: "advanced"} dirsDepth: {description: "Equivalent to MultiQC's `--dirs-depth` option.", category: "advanced"} title: {description: "Equivalent to MultiQC's `--title` option.", category: "advanced"} comment: {description: "Equivalent to MultiQC's `--comment` option.", category: "advanced"} From c882527a4c6e3c476a2a1ba15319b30d70f6dc53 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 17 Jun 2025 10:46:19 +0200 Subject: [PATCH 890/902] Support supplying additional reports/config to multiqc --- CHANGELOG.md | 1 + multiqc.wdl | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b036ac..42542531 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ version 6.0.0-dev + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. ++ Support providing additional reports to MultiQC in workflow configuration. version 5.2.0 --------------------------- diff --git a/multiqc.wdl b/multiqc.wdl index 18667b91..8f05a36e 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -53,6 +53,7 @@ task MultiQC { File? fileList Array[String]+? exclude Array[String]+? module + Array[File]+? additionalReports String? dataFormat File? config # A directory String? clConfig @@ -79,13 +80,15 @@ task MultiQC { # strategy. Using python's builtin hash is unique enough # for these purposes. + Array[File] allReports = flatten([reports, select_all([additionalReports])]) + command { python3 < Date: Wed, 18 Jun 2025 08:38:36 +0200 Subject: [PATCH 891/902] ahmust be flatter --- multiqc.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiqc.wdl b/multiqc.wdl index 8f05a36e..db47ac87 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -80,7 +80,7 @@ task MultiQC { # strategy. Using python's builtin hash is unique enough # for these purposes. - Array[File] allReports = flatten([reports, select_all([additionalReports])]) + Array[File] allReports = flatten([reports, flatten(select_all([additionalReports]))]) command { python3 < Date: Mon, 7 Jul 2025 11:27:17 +0200 Subject: [PATCH 892/902] bump bedtools sort --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index a5d8aab3..50acd42d 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -209,7 +209,7 @@ task Sort { String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(inputBed, "GiB")) - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_2" } command { From c8fbb60c214cb3defe74b71aa60df6eefbae594a Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 7 Jul 2025 11:28:18 +0200 Subject: [PATCH 893/902] document --- CHANGELOG.md | 1 + bedtools.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b036ac..920993ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ bedtools.Sort: bumped container version to permit use of `faidx`. + Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. diff --git a/bedtools.wdl b/bedtools.wdl index 50acd42d..7fcce28f 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -224,7 +224,7 @@ task Sort { ~{true="-chrThenScoreA" false="" chrThenScoreA} \ ~{true="-chrThenScoreD" false="" chrThenScoreD} \ ~{"-g " + genome} \ - ~{"-faidx" + faidx} \ + ~{"-faidx " + faidx} \ > ~{outputBed} } From 633d0bc76ffb27f09b957e7eb26153a8f3d5edac Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 7 Jul 2025 12:33:54 +0200 Subject: [PATCH 894/902] Add no name check support, requiring bumping Intersect as well --- bedtools.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index 7fcce28f..64fccc7b 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -270,10 +270,11 @@ task Intersect { Boolean writeA = false Boolean writeB = false Boolean stranded = false + Boolean nonamecheck = false String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_2" } Boolean sorted = defined(faidx) @@ -289,6 +290,7 @@ task Intersect { ~{true="-wb" false="" writeB} \ ~{true="-s" false="" stranded} \ ~{true="-sorted" false="" sorted} \ + ~{true="-nonamecheck" false="" nonamecheck} \ ~{true="-g sorted.genome" false="" sorted} \ > ~{outputBed} } @@ -313,6 +315,7 @@ task Intersect { writeA: {description: "Write the original entry in A for each overlap.", category: "advanced"} writeB: {description: "Write the original entry in B for each overlap. Useful for knowing what A overlaps.", category: "advanced"} stranded: {description: "Force “strandedness”. That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand.", category: "advanced"} + nonamecheck: {description: "Disable the bedtools intersect name check. This is used to catch chr1 vs chr01 or chr1 vs 1 naming inconsistencies. However, it throws an error for GIAB hg38 which has capital letters. https://github.com/arq5x/bedtools2/issues/648", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From c676fe2198b18b437a3e279c240290b7227b94d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 17 Jul 2025 10:37:57 +0200 Subject: [PATCH 895/902] address review comments --- bcftools.wdl | 13 ++++++++----- snpeff.wdl | 7 ++++++- snpsift.wdl | 7 ++++++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 7e297bc7..31c7db13 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,8 +190,9 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "64GiB" + String memory = "4GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int diskGb = ceil(2.1 * size(inputFile, "G") + size(fasta, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -199,7 +200,7 @@ task Norm { command { set -e - ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools norm \ @@ -222,6 +223,7 @@ task Norm { memory: memory time_minutes: timeMinutes docker: dockerImage + disks: "local-disk ~{diskGb} SSD" # Based on an example in dxCompiler docs } parameter_meta { @@ -234,11 +236,12 @@ task Norm { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + diskGb: {description: "The amount of disk space needed for this job in GiB.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Sorted VCF file."} - outputVcfIndex: {description: "Index of sorted VCF file."} + outputVcf: {description: "Normalized VCF file."} + outputVcfIndex: {description: "Index of Normalized VCF file."} } } @@ -424,7 +427,7 @@ task View { command { set -e - ls ~{inputFile} ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index e1b520af..b972ab30 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -48,7 +48,7 @@ task SnpEff { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -82,6 +82,7 @@ task SnpEff { } parameter_meta { + # inputs vcf: {description: "A VCF file to analyse.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "required"} genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"} @@ -102,5 +103,9 @@ task SnpEff { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Annotated VCF file."} + outputVcfIndex: {description: "Index of annotated VCF file."} } } diff --git a/snpsift.wdl b/snpsift.wdl index 4c354f48..a62f7295 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -40,7 +40,7 @@ task SnpSiftFilter { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -64,6 +64,7 @@ task SnpSiftFilter { } parameter_meta { + # inputs vcf: {description: "A VCF file to filter.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "common"} filterExpression: {description: "The SnpSift filtering expression.", category: "required"} @@ -75,5 +76,9 @@ task SnpSiftFilter { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Filtered VCF file."} + outputVcfIndex: {description: "Index of filtered VCF file."} } } From 9e9ae08503c7c2e10c0fe16d018bfb2810c4f3de Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 11:36:55 +0200 Subject: [PATCH 896/902] Update clair3 image --- CHANGELOG.md | 1 + clair3.wdl | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c56b124a..5cabdece 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ version 6.0.0-dev + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. ++ Update clair3 version from 1.0.11 to 1.1.0 version 5.2.0 --------------------------- diff --git a/clair3.wdl b/clair3.wdl index 5a6154af..ae54ef40 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,8 +34,8 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 400 / threads) - String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + String dockerImage = "quay.io/biocontainers/clair3:1.1.0--py39hd649744_0" } String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" @@ -91,4 +91,4 @@ task Clair3 { vcfIndex: {description: "Output VCF index."} } -} \ No newline at end of file +} From d648745cfeedbc816081547f9772f0ee2d9f1692 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:13:43 +0200 Subject: [PATCH 897/902] Improve whatshap runtime/memory estimates --- CHANGELOG.md | 1 + whatshap.wdl | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cabdece..cfb8f41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ version 6.0.0-dev + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. + Update clair3 version from 1.0.11 to 1.1.0 ++ Improve whatshap runtime/memory usage for our cluster. version 5.2.0 --------------------------- diff --git a/whatshap.wdl b/whatshap.wdl index da86ad82..beef5e99 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -40,12 +40,19 @@ task Phase { String memory = "4GiB" Int timeMinutes = 120 + + String memory = 2 + ceil(size(bam, "G") / 20 ) + Int timeMinutes = 400 + ceil(size(bam, "G") * 0.9 ) + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e + + mkdir -p $(dirname ~{outputVCF}) + whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -110,12 +117,16 @@ task Stats { String? chromosome String memory = "4GiB" - Int timeMinutes = 120 + Int timeMinutes = 30 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { + set -e + + mkdir -p $(dirname ~{tsv}) + whatshap stats \ ~{vcf} \ ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ @@ -169,7 +180,9 @@ task Haplotag { String? regions String? sample - String memory = "4GiB" + String memory = 2 + ceil(size(bam, "G") / 50 ) + Int timeMinutes = 50 + ceil(size(bam, "G") * 2 ) + Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -177,6 +190,9 @@ task Haplotag { command { set -e + + mkdir -p $(dirname ~{outputFile}) + whatshap haplotag \ ~{vcf} \ ~{alignments} \ From 7e246b01de31489577c434f69a5adbd2ab7cea2c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:18:54 +0200 Subject: [PATCH 898/902] Add modkit tasks --- CHANGELOG.md | 3 + modkit.wdl | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfb8f41d..bf9d9238 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,9 @@ version 6.0.0-dev + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. + Update clair3 version from 1.0.11 to 1.1.0 + Improve whatshap runtime/memory usage for our cluster. ++ Add `Modkit.SampleProbs` ++ Add `Modkit.DmrMulti` ++ Add `Modkit.DmrMultiInputPrep` to construct the command line for `Modkit.DmrMulti` version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index 678e326a..a35d8ed2 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -130,7 +130,7 @@ task Summary { Int threads = 4 String memory = ceil(size(bam, "GiB") * 0.1) + 5 # Based on a linear model with some fudge (memory = 0.07540 * file_size - 0.6). - Int timeMinutes = 2880 / threads # 2 Days / threads + Int timeMinutes = 60 # originally this was set at "2 Days / threads" but with 4 threads and that much ram, it's pretty fast. String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" } @@ -177,3 +177,192 @@ task Summary { summaryReport: {description: "The output modkit summary."} } } + +task SampleProbs { + input { + File bam + File bamIndex + + String summary = "modkit-sample-probs" + + Boolean sample = true + Int? numReads # = 10042 + Float? samplingFrac # = 0.1 + Int? seed + + Int threads = 4 + String memory = "32G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p ~{summary} + + modkit sample-probs \ + --threads ~{threads} \ + --out-dir ~{summary} \ + ~{true="" false="--no-sampling" sample} \ + ~{"--num-reads " + numReads} \ + ~{"--sampling-frac " + samplingFrac} \ + ~{"--seed " + seed} \ + --hist \ + ~{bam} + >>> + + output { + File reportCounts = "~{summary}/counts.html" + File reportProportion = "~{summary}/proportion.html" + File reportProbabilitiesTsv = "~{summary}/probabilities.tsv" + File reportThresholdsTsv = "~{summary}/thresholds.tsv" + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} + +task DmrMultiInputPrep { + input { + Array[File] control + Array[File] condition + String controlName + String conditionName + + Int threads = 1 + String memory = "1G" + Int timeMinutes = 5 + String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" + } + + command <<< + cat > modkit_dmr.py <<'CODE' + #!/usr/bin/env python3 + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--control_n', type=str, default='control') + parser.add_argument('--control_f', type=str,nargs='+') + parser.add_argument('--condition_n', type=str, default='condition') + parser.add_argument('--condition_f', type=str,nargs='+') + args = parser.parse_args() + modkit = [] + for i, x in enumerate(args.control_f): + modkit.extend(['-s', x, f'{args.control_n}{i}']) + for i, x in enumerate(args.condition_f): + modkit.extend(['-s', x, f'{args.condition_n}{i}']) + print(' '.join(modkit), end='') + CODE + + python modkit_dmr.py \ + --control_n ~{controlName} \ + --control_f ~{sep=" " control} \ + --condition_n ~{conditionName} \ + --condition_f ~{sep=" " condition} + >>> + + output { + String params = select_first(read_lines(stdout())) + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } +} + + +task DmrMulti { + input { + String dmrMultiArguments + Array[File] control + Array[File] condition + + Array[File] controlIndex + Array[File] conditionIndex + + String controlName + String conditionName + + File referenceFasta + File referenceFastaFai + String dmr_dir = "results" + + File? cpg_islands + + Int threads = 4 + String memory = "32G" + Int timeMinutes = 600 + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p ~{dmr_dir} + + modkit dmr multi \ + ~{dmrMultiArguments} \ + --out-dir ~{dmr_dir} \ + ~{"--regions-bed " + cpg_islands} \ + --ref ~{referenceFasta} \ + --base C \ + --threads ~{threads} \ + --header \ + --log-filepath dmr_multi.log + >>> + + output { + # TODO: other files + File log = "dmr_multi.log" + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} From 8564f8c0a757cecd00155de011e10a1e51ab32cf Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:29:51 +0200 Subject: [PATCH 899/902] I don't feel like documenting it if it isn't used --- CHANGELOG.md | 2 - modkit.wdl | 122 --------------------------------------------------- 2 files changed, 124 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf9d9238..bb09f4f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,8 +57,6 @@ version 6.0.0-dev + Update clair3 version from 1.0.11 to 1.1.0 + Improve whatshap runtime/memory usage for our cluster. + Add `Modkit.SampleProbs` -+ Add `Modkit.DmrMulti` -+ Add `Modkit.DmrMultiInputPrep` to construct the command line for `Modkit.DmrMulti` version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index a35d8ed2..b38929f5 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -244,125 +244,3 @@ task SampleProbs { summaryReport: {description: "The output modkit summary."} } } - -task DmrMultiInputPrep { - input { - Array[File] control - Array[File] condition - String controlName - String conditionName - - Int threads = 1 - String memory = "1G" - Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" - } - - command <<< - cat > modkit_dmr.py <<'CODE' - #!/usr/bin/env python3 - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('--control_n', type=str, default='control') - parser.add_argument('--control_f', type=str,nargs='+') - parser.add_argument('--condition_n', type=str, default='condition') - parser.add_argument('--condition_f', type=str,nargs='+') - args = parser.parse_args() - modkit = [] - for i, x in enumerate(args.control_f): - modkit.extend(['-s', x, f'{args.control_n}{i}']) - for i, x in enumerate(args.condition_f): - modkit.extend(['-s', x, f'{args.condition_n}{i}']) - print(' '.join(modkit), end='') - CODE - - python modkit_dmr.py \ - --control_n ~{controlName} \ - --control_f ~{sep=" " control} \ - --condition_n ~{conditionName} \ - --condition_f ~{sep=" " condition} - >>> - - output { - String params = select_first(read_lines(stdout())) - } - - runtime { - docker: dockerImage - cpu: threads - memory: memory - time_minutes: timeMinutes - } -} - - -task DmrMulti { - input { - String dmrMultiArguments - Array[File] control - Array[File] condition - - Array[File] controlIndex - Array[File] conditionIndex - - String controlName - String conditionName - - File referenceFasta - File referenceFastaFai - String dmr_dir = "results" - - File? cpg_islands - - Int threads = 4 - String memory = "32G" - Int timeMinutes = 600 - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" - } - - command <<< - set -e - mkdir -p ~{dmr_dir} - - modkit dmr multi \ - ~{dmrMultiArguments} \ - --out-dir ~{dmr_dir} \ - ~{"--regions-bed " + cpg_islands} \ - --ref ~{referenceFasta} \ - --base C \ - --threads ~{threads} \ - --header \ - --log-filepath dmr_multi.log - >>> - - output { - # TODO: other files - File log = "dmr_multi.log" - } - - runtime { - docker: dockerImage - cpu: threads - memory: memory - time_minutes: timeMinutes - } - - parameter_meta { - # input - bam: {description: "The input alignment file", category: "required"} - bamIndex: {description: "The index for the input alignment file", category: "required"} - - sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} - numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} - samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} - seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} - - threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # output - summaryReport: {description: "The output modkit summary."} - } -} From 040a43e6723d672f97f126a1628e773d75fb6515 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:29:51 +0200 Subject: [PATCH 900/902] I don't feel like documenting it if it isn't used --- modkit.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index b38929f5..ddf4dbf7 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -229,6 +229,7 @@ task SampleProbs { # input bam: {description: "The input alignment file", category: "required"} bamIndex: {description: "The index for the input alignment file", category: "required"} + summary: {description: "A folder for the outputs", category: "required"} sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} @@ -241,6 +242,9 @@ task SampleProbs { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - summaryReport: {description: "The output modkit summary."} + reportCounts: {description: "The output html report of counts"} + reportProportion: {description: "The output html report of proportions"} + reportProbabilitiesTsv: {description: "The output TSV of Probabilities"} + reportThresholdsTsv: {description: "The output TSV of thresholds"} } } From bd54aeb0bcdd67db3fb180b890954bca92000287 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 13:13:43 +0200 Subject: [PATCH 901/902] incorrect inputs --- whatshap.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index beef5e99..3b2bd1d3 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -41,8 +41,8 @@ task Phase { String memory = "4GiB" Int timeMinutes = 120 - String memory = 2 + ceil(size(bam, "G") / 20 ) - Int timeMinutes = 400 + ceil(size(bam, "G") * 0.9 ) + String memory = 2 + ceil(size(phaseInput, "G") / 20 ) + Int timeMinutes = 400 + ceil(size(phaseInput, "G") * 0.9 ) # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -180,8 +180,8 @@ task Haplotag { String? regions String? sample - String memory = 2 + ceil(size(bam, "G") / 50 ) - Int timeMinutes = 50 + ceil(size(bam, "G") * 2 ) + String memory = 2 + ceil(size(alignments, "G") / 50 ) + Int timeMinutes = 50 + ceil(size(alignments, "G") * 2 ) Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. From e7061594546ceac5e7bbcdc48877bc78b5ec795c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 15:46:06 +0200 Subject: [PATCH 902/902] Fix duplicate declarations --- whatshap.wdl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 3b2bd1d3..b491f566 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -38,9 +38,6 @@ task Phase { String? threshold String? ped - String memory = "4GiB" - Int timeMinutes = 120 - String memory = 2 + ceil(size(phaseInput, "G") / 20 ) Int timeMinutes = 400 + ceil(size(phaseInput, "G") * 0.9 ) @@ -183,7 +180,6 @@ task Haplotag { String memory = 2 + ceil(size(alignments, "G") / 50 ) Int timeMinutes = 50 + ceil(size(alignments, "G") * 2 ) - Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" }