From 32e7a8bf0658d1ed5ff66a34b69b15badcf2dd3a Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 21 Jun 2019 11:21:37 +0200 Subject: [PATCH 0001/1208] add SV task: bcftools.wdl --- bcftools.wdl | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 bcftools.wdl diff --git a/bcftools.wdl b/bcftools.wdl new file mode 100644 index 00000000..de7730bb --- /dev/null +++ b/bcftools.wdl @@ -0,0 +1,22 @@ +version 1.0 + +task Bcf2Vcf { + input { + File bcf + String outputPath + } + + command <<< + set -e + mkdir -p $(dirname ~{outputPath}) + bcftools view ~{bcf} -O v -o ~{outputPath} + >>> + + output { + File OutputVcf = "~{outputPath}" + } + + runtime { + docker: "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" + } +} From 357e43000f2e16e62927dd72cb1f36da24f384a5 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 21 Jun 2019 11:22:50 +0200 Subject: [PATCH 0002/1208] add SV task: clever.wdl --- clever.wdl | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 clever.wdl diff --git a/clever.wdl b/clever.wdl new file mode 100644 index 00000000..b6eabd07 --- /dev/null +++ b/clever.wdl @@ -0,0 +1,74 @@ +version 1.0 + +import "common.wdl" +import "bwa.wdl" +task Prediction { + input { + IndexedBamFile bamFile + BwaIndex bwaIndex + String outputPath + Int threads = 10 + } + + + command <<< + set -e + mkdir -p $(dirname ~{outputPath}) + clever \ + -T ~{threads} \ + --use_mapq \ + --sorted \ + -f \ + ~{bamFile.file} \ + ~{bwaIndex.fastaFile} \ + ~{outputPath} + >>> + + output { + File predictions = "~{outputPath}/predictions.vcf" + } + + runtime { + cpu: threads + docker: "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" + } + +} + +task Mateclever { + input { + IndexedBamFile bamFile + BwaIndex bwaIndex + File predictions + String outputPath + Int threads = 10 + Int cleverMaxDelLength = 100000 + Int maxLengthDiff= 30 + Int maxOffset = 150 + } + + command <<< + set -e + mkdir -p $(dirname ~{outputPath}) + echo ~{outputPath} ~{bamFile.file} ~{predictions} none > predictions.list + mateclever \ + -T ~{threads} \ + -k \ + -f \ + -M ~{cleverMaxDelLength} \ + -z ~{maxLengthDiff} \ + -o ~{maxOffset} \ + ~{bwaIndex.fastaFile} \ + predictions.list \ + ~{outputPath} + >>> + + output { + File matecleverVcf = "~{outputPath}/deletions.vcf" + } + + runtime { + cpu: threads + docker: "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" + } +} From 7dc642847fd4851ca9a75b6808ecd1318dd1781f Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 21 Jun 2019 11:24:13 +0200 Subject: [PATCH 0003/1208] add SV task: delly.wdl --- delly.wdl | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 delly.wdl diff --git a/delly.wdl b/delly.wdl new file mode 100644 index 00000000..2fc63cda --- /dev/null +++ b/delly.wdl @@ -0,0 +1,30 @@ +version 1.0 + +import "common.wdl" + +task CallSV { + input { + IndexedBamFile bamFile + Reference reference + String outputPath + } + + + command <<< + set -e + mkdir -p $(dirname ~{outputPath}) + delly call \ + -o ~{outputPath} \ + -g ~{reference.fasta} \ + ~{bamFile.file} + >>> + + output { + File dellyBcf = "~{outputPath}" + } + + runtime { + docker: "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" + } + +} From 334ee17c7ac79d66c0ab56aad1b84330ccdc772f Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 21 Jun 2019 11:25:44 +0200 Subject: [PATCH 0004/1208] add SV task: survivor.wdl --- survivor.wdl | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 survivor.wdl diff --git a/survivor.wdl b/survivor.wdl new file mode 100644 index 00000000..f4fdc4b1 --- /dev/null +++ b/survivor.wdl @@ -0,0 +1,42 @@ +version 1.0 + +import "common.wdl" + +task Merge { + input{ + Array[File] filePaths + Int breakpointDistance = 1000 + Int suppVecs = 2 + Int svType = 1 + Int strandType = 1 + Int distanceBySvSize = 0 + Int minSize = 30 + String sample + String outputPath + Int memory = 128 + } + + command <<< + set -e + mkdir -p $(dirname ~{outputPath}) + echo '~{sep="\n" filePaths}' > fileList + SURVIVOR merge \ + fileList \ + ~{breakpointDistance} \ + ~{suppVecs} \ + ~{svType} \ + ~{strandType} \ + ~{distanceBySvSize} \ + ~{minSize} \ + ~{outputPath} + >>> + + output { + File mergedVcf = "~{outputPath}" + } + + runtime { + docker: "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" + memory: memory + } +} From 47fa56b27f4e4cf96952cab585fcaf0770c44e0c Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 21 Jun 2019 11:35:35 +0200 Subject: [PATCH 0005/1208] modified: manta.wdl, add germline task for SV pipeline --- manta.wdl | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/manta.wdl b/manta.wdl index 759766e3..e8f11867 100644 --- a/manta.wdl +++ b/manta.wdl @@ -69,3 +69,55 @@ task Somatic { docker: "quay.io/biocontainers/manta:" + dockerTag } } + +task Germline { + input { + IndexedBamFile normalBam + Reference reference + String runDir + File? callRegions + File? callRegionsIndex + Boolean exome = false + + Int cores = 1 + Int memory = 4 + String dockerTag = "1.4.0--py27_1" + } + + command { + set -e + configManta.py \ + ~{"--normalBam " + normalBam.file} \ + --referenceFasta ~{reference.fasta} \ + ~{"--callRegions " + callRegions} \ + --runDir ~{runDir} \ + ~{true="--exome" false="" exome} + + ~{runDir}/runWorkflow.py \ + -m local \ + -j ~{cores} \ + -g ~{memory} + } + + output { + IndexedVcfFile candidateSmallIndels = object { + file: runDir + "/results/variants/candidateSmallIndels.vcf.gz", + index: runDir + "/results/variants/candidateSmallIndels.vcf.gz.tbi" + } + IndexedVcfFile candidateSV = object { + file: runDir + "/results/variants/candidateSV.vcf.gz", + index: runDir + "/results/variants/candidateSV.vcf.gz.tbi" + } + IndexedVcfFile diploidSV = object { + file: runDir + "/results/variants/diploidSV.vcf.gz", + index: runDir + "/results/variants/diploidSV.vcf.gz.tbi" + } + } + + runtime { + cpu: cores + memory: memory + docker: docker: "quay.io/biocontainers/manta:" + dockerTag + } +} + From f6062171d474d52568c2653ca6d0e4ccf389ebef Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 21 Jun 2019 13:59:07 +0200 Subject: [PATCH 0006/1208] modify manta.wdl: deleted duplicates --- manta.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manta.wdl b/manta.wdl index e8f11867..a80ec832 100644 --- a/manta.wdl +++ b/manta.wdl @@ -117,7 +117,7 @@ task Germline { runtime { cpu: cores memory: memory - docker: docker: "quay.io/biocontainers/manta:" + dockerTag + docker: "quay.io/biocontainers/manta:" + dockerTag } } From 963be8b7b68f6977323af4bd87e3cf2549949755 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 21 Jun 2019 14:08:07 +0200 Subject: [PATCH 0007/1208] modify tasks/picard.wdl: add renameSample task --- picard.wdl | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 9f42acbd..d79228a1 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1,5 +1,7 @@ version 1.0 +import "common.wdl" + task BedToIntervalList { input { File bedFile @@ -459,4 +461,34 @@ task SortVcf { docker: "quay.io/biocontainers/picard:" + dockerTag memory: ceil(memory * memoryMultiplier) } -} \ No newline at end of file +} + +task RenameSample { + input { + File inputVcf + String outputPath + String newSampleName + Int memory = 8 + Float memoryMultiplier = 3.0 + } + + command { + set -e + mkdir -p $(dirname ~{outputPath}) + picard -Xmx~{memory}G \ + RenameSampleInVcf \ + I=~{inputVcf} \ + O=~{outputPath} \ + NEW_SAMPLE_NAME=~{newSampleName} + } + + output { + File renamedVcf = "~{outputPath}" + } + + runtime { + docker: "quay.io/biocontainers/picard:2.19.0--0" + memory: ceil(memory * memoryMultiplier) + } +} + From 61bc61458eeea9070b67bde5e2486453db54d85f Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 26 Jun 2019 09:42:47 +0200 Subject: [PATCH 0008/1208] modify delly.wdl: add 6G memory in runtime --- delly.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/delly.wdl b/delly.wdl index 2fc63cda..e0099064 100644 --- a/delly.wdl +++ b/delly.wdl @@ -25,6 +25,7 @@ task CallSV { runtime { docker: "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" + memory: "5G" } } From 4ffeedfe8952b67ae0b80d53f54758bc34e1a9d6 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 26 Jun 2019 09:44:25 +0200 Subject: [PATCH 0009/1208] modify clever.wdl: add 6G memory in runtime --- clever.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clever.wdl b/clever.wdl index b6eabd07..031af0f1 100644 --- a/clever.wdl +++ b/clever.wdl @@ -30,6 +30,7 @@ task Prediction { runtime { cpu: threads + memory: "6G" docker: "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -69,6 +70,7 @@ task Mateclever { runtime { cpu: threads + memory: "6G" docker: "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } } From 04d2a805f6b8a386c741b61c9a777c2b4a8cf146 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 1 Jul 2019 10:19:50 +0200 Subject: [PATCH 0010/1208] modify clever.wdl: replaced string "6G" to integer 6 in memory runtime --- clever.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clever.wdl b/clever.wdl index 031af0f1..e451ecf4 100644 --- a/clever.wdl +++ b/clever.wdl @@ -8,6 +8,7 @@ task Prediction { BwaIndex bwaIndex String outputPath Int threads = 10 + Int mem = 6 } @@ -30,7 +31,7 @@ task Prediction { runtime { cpu: threads - memory: "6G" + memory: mem docker: "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -43,6 +44,7 @@ task Mateclever { File predictions String outputPath Int threads = 10 + Int mem = 6 Int cleverMaxDelLength = 100000 Int maxLengthDiff= 30 Int maxOffset = 150 @@ -70,7 +72,7 @@ task Mateclever { runtime { cpu: threads - memory: "6G" + memory: mem docker: "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } } From fe4faad7228e789b0cfedb9080bc8bbdc68fb3c6 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 1 Jul 2019 10:21:28 +0200 Subject: [PATCH 0011/1208] modify delly.wdl: replaced string "5G" to integer 5 in memory runtime --- delly.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index e0099064..1bce6f75 100644 --- a/delly.wdl +++ b/delly.wdl @@ -7,6 +7,7 @@ task CallSV { IndexedBamFile bamFile Reference reference String outputPath + Int mem = 5 } @@ -25,7 +26,7 @@ task CallSV { runtime { docker: "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" - memory: "5G" + memory: mem } } From e1e63b2dd4aef1db7c4e9c2e44e59f1c5ad53664 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 1 Jul 2019 12:28:21 +0200 Subject: [PATCH 0012/1208] modify delly.wdl: increased memory to 10G --- delly.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index 1bce6f75..6e5a63d1 100644 --- a/delly.wdl +++ b/delly.wdl @@ -7,7 +7,7 @@ task CallSV { IndexedBamFile bamFile Reference reference String outputPath - Int mem = 5 + Int mem = 10 } From 8ef3d860923dcb9d9a29ffac523f108a563a5711 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 1 Jul 2019 18:38:17 +0200 Subject: [PATCH 0013/1208] modify clever.wdl: increased memory from 6G to 10G --- clever.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clever.wdl b/clever.wdl index e451ecf4..619b6364 100644 --- a/clever.wdl +++ b/clever.wdl @@ -8,7 +8,7 @@ task Prediction { BwaIndex bwaIndex String outputPath Int threads = 10 - Int mem = 6 + Int mem = 10 } @@ -44,7 +44,7 @@ task Mateclever { File predictions String outputPath Int threads = 10 - Int mem = 6 + Int mem = 10 Int cleverMaxDelLength = 100000 Int maxLengthDiff= 30 Int maxOffset = 150 From dc1f66b5e1cb5875692026be6f18aaccf8e61458 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 17 Jul 2019 11:25:27 +0200 Subject: [PATCH 0014/1208] samtools.wdl: add include header option to samtools view --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index a78f5535..53559062 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -209,6 +209,7 @@ task View { File inFile File? referenceFasta String outputFileName + Boolean? includeHeader Boolean? outputBam Boolean? uncompressedBamOutput Int? includeFilter @@ -225,6 +226,7 @@ task View { samtools view \ ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ + ~{true="-h " false="" includeHeader} \ ~{true="-b " false="" outputBam} \ ~{true="-u " false="" uncompressedBamOutput} \ ~{"-f " + includeFilter} \ From 2f1604a82920933012d1e8f699551a6da739873b Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 18 Jul 2019 14:15:07 +0200 Subject: [PATCH 0015/1208] assign filtered bam as input to mateclever --- clever.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clever.wdl b/clever.wdl index 619b6364..5f9e9c72 100644 --- a/clever.wdl +++ b/clever.wdl @@ -39,7 +39,8 @@ task Prediction { task Mateclever { input { - IndexedBamFile bamFile + File fiteredBamFile + File indexedFiteredBamFile BwaIndex bwaIndex File predictions String outputPath @@ -53,7 +54,7 @@ task Mateclever { command <<< set -e mkdir -p $(dirname ~{outputPath}) - echo ~{outputPath} ~{bamFile.file} ~{predictions} none > predictions.list + echo ~{outputPath} ~{fiteredBamFile} ~{predictions} none > predictions.list mateclever \ -T ~{threads} \ -k \ From 048464271d4575bb54137ffb93e008fdaa40c7de Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 23 Sep 2019 10:49:11 +0200 Subject: [PATCH 0016/1208] increase memory clever and delly --- clever.wdl | 4 ++-- delly.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clever.wdl b/clever.wdl index 5f9e9c72..8e90be8e 100644 --- a/clever.wdl +++ b/clever.wdl @@ -8,7 +8,7 @@ task Prediction { BwaIndex bwaIndex String outputPath Int threads = 10 - Int mem = 10 + Int mem = 11 } @@ -45,7 +45,7 @@ task Mateclever { File predictions String outputPath Int threads = 10 - Int mem = 10 + Int mem = 11 Int cleverMaxDelLength = 100000 Int maxLengthDiff= 30 Int maxOffset = 150 diff --git a/delly.wdl b/delly.wdl index 6e5a63d1..794f78eb 100644 --- a/delly.wdl +++ b/delly.wdl @@ -7,7 +7,7 @@ task CallSV { IndexedBamFile bamFile Reference reference String outputPath - Int mem = 10 + Int mem = 11 } From 3bdf90e4ec2d6a176c4520c0fe621686378c2997 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 24 Sep 2019 09:41:08 +0200 Subject: [PATCH 0017/1208] increase memory of clever and delly to 15G --- clever.wdl | 4 ++-- delly.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clever.wdl b/clever.wdl index 8e90be8e..6863e6f8 100644 --- a/clever.wdl +++ b/clever.wdl @@ -8,7 +8,7 @@ task Prediction { BwaIndex bwaIndex String outputPath Int threads = 10 - Int mem = 11 + Int mem = 15 } @@ -45,7 +45,7 @@ task Mateclever { File predictions String outputPath Int threads = 10 - Int mem = 11 + Int mem = 15 Int cleverMaxDelLength = 100000 Int maxLengthDiff= 30 Int maxOffset = 150 diff --git a/delly.wdl b/delly.wdl index 794f78eb..f30e6f48 100644 --- a/delly.wdl +++ b/delly.wdl @@ -7,7 +7,7 @@ task CallSV { IndexedBamFile bamFile Reference reference String outputPath - Int mem = 11 + Int mem = 15 } From e77b5b4e0cf6fd5d38ff29686205e8ea690f7075 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 2 Dec 2019 14:42:25 +0100 Subject: [PATCH 0018/1208] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 7ec1d6db..ccbccc3d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.1.0 +2.2.0 From f810eaf9673b98353efebd0d1cf9e98ea012a3ba Mon Sep 17 00:00:00 2001 From: jboom1 Date: Mon, 9 Dec 2019 14:09:57 +0100 Subject: [PATCH 0019/1208] Update TALON default image to version 4.4.1. --- CHANGELOG.md | 5 +++-- talon.wdl | 16 ++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c20e0d6a..ad645cd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,13 +11,14 @@ that users understand how the changes affect the new version. version 2.1.0 --------------------------- -+ Make intervals optional for GATK CombineGVCFs ++ Update TALON default image to version 4.4.1. ++ Make intervals optional for GATK CombineGVCFs. + Updated biowdl-input-converter version. + GATK CombineGVCFs memory was tripled to prevent it from using a lot of CPU in Garbage Collection mode. + Updated parameter_meta sections for Minimap2 and TranscriptClean to wdl-aid format. + Updated cores variable for TALON, the default is now 4. -+ Updated TALON to version 4.4. ++ Updated TALON default image to version 4.4. + Added parameter_meta sections to the following tools: + htseq + cutadapt diff --git a/talon.wdl b/talon.wdl index 9d3b5304..9c426e55 100644 --- a/talon.wdl +++ b/talon.wdl @@ -32,7 +32,7 @@ task CreateAbundanceFileFromDatabase { Int cores = 1 String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4_cv1" + String dockerImage = "biocontainers/talon:v4.4.1_cv1" } command { @@ -102,7 +102,7 @@ task CreateGtfFromDatabase { Int cores = 1 String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4_cv1" + String dockerImage = "biocontainers/talon:v4.4.1_cv1" } command { @@ -174,7 +174,7 @@ task FilterTalonTranscripts { Int cores = 1 String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4_cv1" + String dockerImage = "biocontainers/talon:v4.4.1_cv1" } command { @@ -227,7 +227,7 @@ task GetReadAnnotations { Int cores = 1 String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4_cv1" + String dockerImage = "biocontainers/talon:v4.4.1_cv1" } command { @@ -287,7 +287,7 @@ task InitializeTalonDatabase { Int cores = 1 String memory = "10G" - String dockerImage = "biocontainers/talon:v4.4_cv1" + String dockerImage = "biocontainers/talon:v4.4.1_cv1" } command { @@ -360,7 +360,7 @@ task ReformatGtf { Int cores = 1 String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4_cv1" + String dockerImage = "biocontainers/talon:v4.4.1_cv1" } command { @@ -397,7 +397,7 @@ task SummarizeDatasets { Int cores = 1 String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4_cv1" + String dockerImage = "biocontainers/talon:v4.4.1_cv1" } command { @@ -458,7 +458,7 @@ task Talon { Int cores = 4 String memory = "20G" - String dockerImage = "biocontainers/talon:v4.4_cv1" + String dockerImage = "biocontainers/talon:v4.4.1_cv1" } command { From b42f2286242e0e9d85eb2ba56e357d699c2bfe92 Mon Sep 17 00:00:00 2001 From: jboom1 Date: Mon, 9 Dec 2019 14:11:35 +0100 Subject: [PATCH 0020/1208] Change update to reflect version 2.2.0. --- CHANGELOG.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad645cd4..c2cb40b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,16 +9,19 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 2.1.0 +version 2.2.0 --------------------------- + Update TALON default image to version 4.4.1. + +version 2.1.0 +--------------------------- + Make intervals optional for GATK CombineGVCFs. + Updated biowdl-input-converter version. + GATK CombineGVCFs memory was tripled to prevent it from using a lot of CPU in Garbage Collection mode. + Updated parameter_meta sections for Minimap2 and TranscriptClean to wdl-aid format. + Updated cores variable for TALON, the default is now 4. -+ Updated TALON default image to version 4.4. ++ Updated TALON to version 4.4. + Added parameter_meta sections to the following tools: + htseq + cutadapt From faf7b585d5526c33e2dea0cbe686e31b795309da Mon Sep 17 00:00:00 2001 From: Jasper Boom Date: Mon, 9 Dec 2019 14:26:15 +0100 Subject: [PATCH 0021/1208] Update CHANGELOG.md Co-Authored-By: DavyCats --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c2cb40b0..09418cc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 2.2.0 +version 2.2.0-dev --------------------------- + Update TALON default image to version 4.4.1. From 0bc7de8bb472f1f6d6edd74cd03fea0e856b98eb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Dec 2019 14:06:57 +0100 Subject: [PATCH 0022/1208] update scripts submodule --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 83bf72b9..fc603e5d 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 83bf72b91f6b9660f64dfa9d8096f6e57b167083 +Subproject commit fc603e5d408b89b99297fb5737586c059c5f9df6 From 62760c247f9cc0c9e7beef6223afa0034e2c27a5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Dec 2019 14:14:40 +0100 Subject: [PATCH 0023/1208] add miniwdl to test requirements file --- requirements-test.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-test.txt b/requirements-test.txt index 438f683f..f074413b 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1 +1,2 @@ -cromwell \ No newline at end of file +cromwell +miniwdl \ No newline at end of file From bea74f064215c674d8c580fed104d1d57cb3bfdd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Dec 2019 14:31:01 +0100 Subject: [PATCH 0024/1208] adress unused inputs in bedtools sort --- CPAT.wdl | 2 +- bedtools.wdl | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index 53aeac94..f9a77bed 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -18,7 +18,7 @@ task CPAT { # select_first is needed in order to convert the optional arrays to non-optionals. command { set -e - mkdir -p $(dirname ~{outFilePath}) + mkdir -p "$(dirname ~{outFilePath})" cpat.py \ --gene ~{gene} \ --outfile ~{outFilePath} \ diff --git a/bedtools.wdl b/bedtools.wdl index f9859f93..35ce7319 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -27,6 +27,7 @@ task Sort { Boolean sizeD = false Boolean chrThenSizeA = false Boolean chrThenSizeD = false + Boolean chrThenScoreA = false Boolean chrThenScoreD = false File? g File? faidx @@ -41,6 +42,9 @@ task Sort { -i ~{inputBed} \ ~{true="-sizeA" false="" sizeA} \ ~{true="-sizeD" false="" sizeD} \ + ~{true="-chrThenSizeA" false="" chrThenSizeA} \ + ~{true="-chrThenSizeD" false="" chrThenSizeD} \ + ~{true="-chrThenScoreA" false="" chrThenScoreA} \ ~{true="-chrThenScoreD" false="" chrThenScoreD} \ ~{"-g " + g} \ ~{"-faidx" + faidx} \ From 861825712da5b2e65780110c929506fa4903d962 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Dec 2019 14:55:21 +0100 Subject: [PATCH 0025/1208] fix SC2046 --- bedtools.wdl | 2 +- biowdl.wdl | 2 +- bowtie.wdl | 2 +- bwa.wdl | 2 +- collect-columns.wdl | 2 +- common.wdl | 6 +++--- fastqsplitter.wdl | 2 +- gatk.wdl | 20 ++++++++++---------- hisat2.wdl | 2 +- htseq.wdl | 2 +- minimap2.wdl | 4 ++-- picard.wdl | 18 +++++++++--------- samtools.wdl | 16 ++++++++-------- seqtk.wdl | 2 +- star.wdl | 2 +- stringtie.wdl | 4 ++-- talon.wdl | 14 +++++++------- transcriptclean.wdl | 6 +++--- wisestork.wdl | 2 +- 19 files changed, 55 insertions(+), 55 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 35ce7319..f6748f31 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -37,7 +37,7 @@ task Sort { command { set -e - mkdir -p $(dirname ~{outputBed}) + mkdir -p "$(dirname ~{outputBed})" bedtools sort \ -i ~{inputBed} \ ~{true="-sizeA" false="" sizeA} \ diff --git a/biowdl.wdl b/biowdl.wdl index e524ac6e..32fd5a73 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -36,7 +36,7 @@ task InputConverter { command <<< set -e - mkdir -p $(dirname ~{outputFile}) + mkdir -p "$(dirname ~{outputFile})" biowdl-input-converter \ -o ~{outputFile} \ ~{true="--skip-file-check" false="" skipFileCheck} \ diff --git a/bowtie.wdl b/bowtie.wdl index 51d18be7..72a39641 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -49,7 +49,7 @@ task Bowtie { command { set -e -o pipefail - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" bowtie -q \ --sam \ ~{"--seedmms " + seedmms} \ diff --git a/bwa.wdl b/bwa.wdl index 66238991..05c8716a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -18,7 +18,7 @@ task Mem { command { set -e -o pipefail - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" bwa mem \ ~{"-t " + threads} \ ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ diff --git a/collect-columns.wdl b/collect-columns.wdl index 263e317d..d453e5be 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -18,7 +18,7 @@ task CollectColumns { command { set -e - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" collect-columns \ ~{outputPath} \ ~{sep=" " inputTables} \ diff --git a/common.wdl b/common.wdl index dccac856..a8365198 100644 --- a/common.wdl +++ b/common.wdl @@ -57,7 +57,7 @@ task ConcatenateTextFiles { command { set -e -o pipefail - mkdir -p $(dirname ~{combinedFilePath}) + mkdir -p "$(dirname ~{combinedFilePath})" ~{cmdPrefix} ~{sep=" " fileList} ~{cmdSuffix} > ~{combinedFilePath} } @@ -82,7 +82,7 @@ task Copy { command { set -e - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" cp ~{true="-r" false="" recursive} ~{inputFile} ~{outputPath} } @@ -166,7 +166,7 @@ task YamlToJson { } command { set -e - mkdir -p $(dirname ~{outputJson}) + mkdir -p "$(dirname ~{outputJson})" python < " arguments according to IDs and VCFs to merge # Make sure commands are run in bash diff --git a/hisat2.wdl b/hisat2.wdl index e40f3616..3423e56b 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -24,7 +24,7 @@ task Hisat2 { command { set -e -o pipefail - mkdir -p $(dirname ~{outputBam}) + mkdir -p "$(dirname ~{outputBam})" hisat2 \ -p ~{threads} \ -x ~{sub(indexFiles[0], "\.[0-9]\.ht2", "")} \ diff --git a/htseq.wdl b/htseq.wdl index 645396e8..3afe51a4 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -18,7 +18,7 @@ task HTSeqCount { command { set -e - mkdir -p $(dirname ~{outputTable}) + mkdir -p "$(dirname ~{outputTable})" htseq-count \ -f ~{format} \ -r ~{order} \ diff --git a/minimap2.wdl b/minimap2.wdl index c29f3314..d8a454da 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -37,7 +37,7 @@ task Indexing { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ ~{true="-H" false="" useHomopolymerCompressedKmer} \ ~{"-k " + kmerSize} \ @@ -116,7 +116,7 @@ task Mapping { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ ~{"-x " + presetOption} \ ~{"-k " + kmerSize} \ diff --git a/picard.wdl b/picard.wdl index 12e8d880..caee009b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -13,7 +13,7 @@ task BedToIntervalList { command { set -e - mkdir -p $(dirname "~{outputPath}") + mkdir -p "$(dirname ~{outputPath})" picard -Xmx~{javaXmx} \ BedToIntervalList \ I=~{bedFile} \ @@ -58,7 +58,7 @@ task CollectMultipleMetrics { command { set -e - mkdir -p $(dirname "~{basename}") + mkdir -p "$(dirname ~{basename})" picard -Xmx~{javaXmx} \ CollectMultipleMetrics \ I=~{inputBam} \ @@ -140,7 +140,7 @@ task CollectRnaSeqMetrics { command { set -e - mkdir -p $(dirname "~{basename}") + mkdir -p "$(dirname ~{basename})" picard -Xmx~{javaXmx} \ CollectRnaSeqMetrics \ I=~{inputBam} \ @@ -179,7 +179,7 @@ task CollectTargetedPcrMetrics { command { set -e - mkdir -p $(dirname "~{basename}") + mkdir -p "$(dirname ~{basename})" picard -Xmx~{javaXmx} \ CollectTargetedPcrMetrics \ I=~{inputBam} \ @@ -217,7 +217,7 @@ task GatherBamFiles { command { set -e - mkdir -p $(dirname ~{outputBamPath}) + mkdir -p "$(dirname ~{outputBamPath})" picard -Xmx~{javaXmx} \ GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ @@ -251,7 +251,7 @@ task GatherVcfs { command { set -e - mkdir -p $(dirname ~{outputVcfPath}) + mkdir -p "$(dirname ~{outputVcfPath})" picard -Xmx~{javaXmx} \ GatherVcfs \ INPUT=~{sep=' INPUT=' inputVcfs} \ @@ -294,7 +294,7 @@ task MarkDuplicates { command { set -e - mkdir -p $(dirname ~{outputBamPath}) + mkdir -p "$(dirname ~{outputBamPath})" picard -Xmx~{javaXmx} \ MarkDuplicates \ INPUT=~{sep=' INPUT=' inputBams} \ @@ -339,7 +339,7 @@ task MergeVCFs { command { set -e - mkdir -p $(dirname ~{outputVcfPath}) + mkdir -p "$(dirname ~{outputVcfPath})" picard -Xmx~{javaXmx} \ MergeVcfs \ INPUT=~{sep=' INPUT=' inputVCFs} \ @@ -443,7 +443,7 @@ task SortVcf { command { set -e - mkdir -p $(dirname ~{outputVcfPath}) + mkdir -p "$(dirname ~{outputVcfPath})" picard -Xmx~{javaXmx} \ SortVcf \ I=~{sep=" I=" vcfFiles} \ diff --git a/samtools.wdl b/samtools.wdl index 871110d2..492cfaf4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -13,7 +13,7 @@ task BgzipAndIndex { command { set -e - mkdir -p $(dirname ~{outputGz}) + mkdir -p "$(dirname ~{outputGz})" bgzip -c ~{inputFile} > ~{outputGz} tabix ~{outputGz} -p ~{type} } @@ -45,7 +45,7 @@ task Index { # Make sure outputBamPath does not exist. if [ ! -f ~{outputPath} ] then - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" ln ~{bamFile} ~{outputPath} fi samtools index ~{outputPath} ~{bamIndexPath} @@ -74,7 +74,7 @@ task Merge { command { set -e - mkdir -p $(dirname ~{outputBamPath}) + mkdir -p "$(dirname ~{outputBamPath})" samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} samtools index ~{outputBamPath} ~{indexPath} } @@ -99,7 +99,7 @@ task SortByName { command { set -e - mkdir -p $(dirname ~{outputBamPath}) + mkdir -p "$(dirname ~{outputBamPath})" samtools sort -n ~{bamFile} -o ~{outputBamPath} } @@ -122,7 +122,7 @@ task Markdup { command { set -e - mkdir -p $(dirname ~{outputBamPath}) + mkdir -p "$(dirname ~{outputBamPath})" samtools markdup ~{inputBam} ~{outputBamPath} } @@ -145,7 +145,7 @@ task Flagstat { command { set -e - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" samtools flagstat ~{inputBam} > ~{outputPath} } @@ -224,7 +224,7 @@ task Tabix { # FIXME: It is better to do the indexing on VCF creation. Not in a separate task. With file localization this gets hairy fast. command { set -e - mkdir -p $(dirname ~{outputFilePath}) + mkdir -p "$(dirname ~{outputFilePath})" if [ ! -f ~{outputFilePath} ] then ln ~{inputFile} ~{outputFilePath} @@ -262,7 +262,7 @@ task View { # Always output to bam and output header command { set -e - mkdir -p $(dirname ~{outputFileName}) + mkdir -p "$(dirname ~{outputFileName})" samtools view -b \ ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ diff --git a/seqtk.wdl b/seqtk.wdl index 662d7e29..0b1419d6 100644 --- a/seqtk.wdl +++ b/seqtk.wdl @@ -13,7 +13,7 @@ task Sample { command { set -e -o pipefail - mkdir -p $(dirname outFilePath) + mkdir -p "$(dirname ~{outFilePath})" ~{preCommand} seqtk sample \ ~{"-s " + seed} \ diff --git a/star.wdl b/star.wdl index 3ff6cf55..fb788175 100644 --- a/star.wdl +++ b/star.wdl @@ -24,7 +24,7 @@ task Star { command { set -e - mkdir -p $(dirname ~{outFileNamePrefix}) + mkdir -p "$(dirname ~{outFileNamePrefix})" STAR \ --readFilesIn ~{sep=',' inputR1} ~{sep="," inputR2} \ --outFileNamePrefix ~{outFileNamePrefix} \ diff --git a/stringtie.wdl b/stringtie.wdl index 3392d7b2..2dcaa9a1 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -18,7 +18,7 @@ task Stringtie { command { set -e - mkdir -p $(dirname ~{assembledTranscriptsFile}) + mkdir -p "$(dirname ~{assembledTranscriptsFile})" stringtie \ ~{"-p " + threads} \ ~{"-G " + referenceGtf} \ @@ -108,7 +108,7 @@ task Merge { command { set -e - mkdir -p $(dirname ~{outputGtfPath}) + mkdir -p "$(dirname ~{outputGtfPath})" stringtie --merge \ -o ~{outputGtfPath} \ ~{"-G " + guideGtf} \ diff --git a/talon.wdl b/talon.wdl index 9c426e55..af98ed61 100644 --- a/talon.wdl +++ b/talon.wdl @@ -37,7 +37,7 @@ task CreateAbundanceFileFromDatabase { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" talon_abundance \ ~{"--db=" + databaseFile} \ ~{"-a " + annotationVersion} \ @@ -107,7 +107,7 @@ task CreateGtfFromDatabase { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" talon_create_GTF \ ~{"--db=" + databaseFile} \ ~{"-b " + genomeBuild} \ @@ -179,7 +179,7 @@ task FilterTalonTranscripts { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" talon_filter_transcripts \ ~{"--db=" + databaseFile} \ ~{"-a " + annotationVersion} \ @@ -232,7 +232,7 @@ task GetReadAnnotations { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" talon_fetch_reads \ ~{"--db " + databaseFile} \ ~{"--build " + genomeBuild} \ @@ -292,7 +292,7 @@ task InitializeTalonDatabase { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" talon_initialize_database \ ~{"--f=" + GTFfile} \ ~{"--g=" + genomeBuild} \ @@ -402,7 +402,7 @@ task SummarizeDatasets { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" talon_summarize \ ~{"--db " + databaseFile} \ ~{true="--verbose" false="" setVerbose} \ @@ -463,7 +463,7 @@ task Talon { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" mv ${configFile} ./${configFileName} mv ${SAMfile} ./${SAMfileName} talon \ diff --git a/transcriptclean.wdl b/transcriptclean.wdl index df187fd2..b7b913dc 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -34,7 +34,7 @@ task GetSJsFromGtf { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" get_SJs_from_gtf \ ~{"--f=" + GTFfile} \ ~{"--g=" + genomeFile} \ @@ -88,7 +88,7 @@ task GetTranscriptCleanStats { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" get_TranscriptClean_stats \ ~{transcriptCleanSAMfile} \ ~{outputPrefix} @@ -146,7 +146,7 @@ task TranscriptClean { command { set -e - mkdir -p $(dirname ~{outputPrefix}) + mkdir -p "$(dirname ~{outputPrefix})" TranscriptClean \ ~{"-s " + SAMfile} \ ~{"-g " + referenceGenome} \ diff --git a/wisestork.wdl b/wisestork.wdl index 95bfcd06..0fd812b1 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -34,7 +34,7 @@ task Count { command { set -e - mkdir -p $(dirname ~{outputBed}) + mkdir -p "$(dirname ~{outputBed})" wisestork count \ ~{"--binsize " + binSize} \ --reference ~{reference} \ From 46d05bb5fc546840bcd2c4751b99dc4fa137aa69 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Dec 2019 17:02:49 +0100 Subject: [PATCH 0026/1208] Update TALON output to align with new version. --- CHANGELOG.md | 2 ++ common.wdl | 2 +- talon.wdl | 19 +++++++++---------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09418cc8..ebe58ea5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Update biowdl-input-converter in common.wdl to version 0.2.1. ++ Update TALON section to now include the new annotation file. + Update TALON default image to version 4.4.1. version 2.1.0 diff --git a/common.wdl b/common.wdl index dccac856..21227e4f 100644 --- a/common.wdl +++ b/common.wdl @@ -162,7 +162,7 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" # biowdl-input-converter has python and pyyaml. - String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.0--py_0" + String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } command { set -e diff --git a/talon.wdl b/talon.wdl index 9c426e55..e350d13c 100644 --- a/talon.wdl +++ b/talon.wdl @@ -446,7 +446,6 @@ task SummarizeDatasets { task Talon { input { - File SAMfile File configFile File databaseFile String genomeBuild @@ -454,10 +453,9 @@ task Talon { Int minimumIdentity = 0 String outputPrefix String configFileName = basename(configFile) - String SAMfileName = basename(SAMfile) Int cores = 4 - String memory = "20G" + String memory = "25G" String dockerImage = "biocontainers/talon:v4.4.1_cv1" } @@ -465,7 +463,7 @@ task Talon { set -e mkdir -p $(dirname ~{outputPrefix}) mv ${configFile} ./${configFileName} - mv ${SAMfile} ./${SAMfileName} + export TMPDIR=/tmp talon \ ~{"--f " + configFileName} \ ~{"--db " + databaseFile} \ @@ -473,12 +471,13 @@ task Talon { ~{"--threads " + cores} \ ~{"--cov " + minimumCoverage} \ ~{"--identity " + minimumIdentity} \ - ~{"--o " + outputPrefix} + ~{"--o " + outputPrefix + "/run"} } output { File outputUpdatedDatabase = databaseFile - File outputLog = outputPrefix + "_talon_QC.log" + File outputLog = outputPrefix + "/run_QC.log" + File outputAnnot = outputPrefix + "/run_talon_read_annot.tsv" } runtime { @@ -488,10 +487,6 @@ task Talon { } parameter_meta { - SAMfile: { - description: "Input SAM file, same one as described in configFile.", - category: "required" - } configFile: { description: "Dataset config file (comma-delimited).", category: "required" @@ -524,5 +519,9 @@ task Talon { description: "Log file from TALON run.", category: "required" } + outputAnnot: { + description: "Read annotation file from TALON run.", + category: "required" + } } } From 862458d7df4cae6146f8a1e501a3549ba4903167 Mon Sep 17 00:00:00 2001 From: jboom1 Date: Tue, 10 Dec 2019 17:04:28 +0100 Subject: [PATCH 0027/1208] Specify update in CHANGELOG better. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebe58ea5..10d262ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- + Update biowdl-input-converter in common.wdl to version 0.2.1. -+ Update TALON section to now include the new annotation file. ++ Update TALON section to now include the new annotation file output. + Update TALON default image to version 4.4.1. version 2.1.0 From 84acc7ebdd6af1656b000b492f177cf451954f5a Mon Sep 17 00:00:00 2001 From: jboom1 Date: Wed, 11 Dec 2019 12:48:14 +0100 Subject: [PATCH 0028/1208] Remove left-over mv command, is now fixed by export TPMDIR=/tmp. --- talon.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/talon.wdl b/talon.wdl index e350d13c..57ffce90 100644 --- a/talon.wdl +++ b/talon.wdl @@ -452,7 +452,6 @@ task Talon { Float minimumCoverage = 0.9 Int minimumIdentity = 0 String outputPrefix - String configFileName = basename(configFile) Int cores = 4 String memory = "25G" @@ -462,10 +461,9 @@ task Talon { command { set -e mkdir -p $(dirname ~{outputPrefix}) - mv ${configFile} ./${configFileName} export TMPDIR=/tmp talon \ - ~{"--f " + configFileName} \ + ~{"--f " + configFile} \ ~{"--db " + databaseFile} \ ~{"--build " + genomeBuild} \ ~{"--threads " + cores} \ From f1204645a0d22afa90b9914e85762ad0da549193 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 11 Dec 2019 16:15:41 +0100 Subject: [PATCH 0029/1208] a few more linting things --- common.wdl | 3 ++- fastqc.wdl | 4 +++- gatk.wdl | 16 +++++++--------- gffcompare.wdl | 4 ++-- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/common.wdl b/common.wdl index a8365198..0aba0aef 100644 --- a/common.wdl +++ b/common.wdl @@ -121,7 +121,8 @@ task MapMd5 { } command { - cat ~{write_map(map)} | md5sum - | sed -e 's/ -//' + set -e -o pipefail + md5sum "~{write_map(map)}" | cut -f 1 -d ' ' } output { diff --git a/fastqc.wdl b/fastqc.wdl index 931c153a..1e835c4e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -145,7 +145,9 @@ task GetConfiguration { command <<< set -e - fastqcDir=$(dirname $(readlink -f $(which fastqc))) + fastqcExe="$(command -v fastqc)" + fastqcPath="$(readlink -f $fastqcExe)" + fastqcDir="$(dirname $fastqcPath)" mkdir Configuration cp ${fastqcDir}/Configuration/adapter_list.txt Configuration/adapter_list.txt cp ${fastqcDir}/Configuration/contaminant_list.txt Configuration/contaminant_list.txt diff --git a/gatk.wdl b/gatk.wdl index 59880668..a48cb8b0 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -530,16 +530,15 @@ task CombineVariants { # build "-V: " arguments according to IDs and VCFs to merge # Make sure commands are run in bash - bash -c '#!/usr/bin/env bash - set -eux + V_args=$(bash -c ' + set -eu ids=(~{sep=" " identifiers}) vars=(~{sep=" " variantVcfs}) - V_args=$( - for (( i = 0; i < ${#ids[@]}; ++i )) - do - printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" - done - ) + for (( i = 0; i < ${#ids[@]}; ++i )) + do + printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" + done + ') java -Xmx~{javaXmx} -jar ~{installDir}/GenomeAnalysisTK.jar \ -T CombineVariants \ -R ~{referenceFasta} \ @@ -547,7 +546,6 @@ task CombineVariants { --filteredrecordsmergetype ~{filteredRecordsMergeType} \ --out ~{outputPath} \ $V_args - ' >>> output { diff --git a/gffcompare.wdl b/gffcompare.wdl index 9f5f1af5..b60881fa 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -31,8 +31,8 @@ task GffCompare { File? noneFile # This is a wdl workaround. Please do not assign! } # This allows for the creation of output directories - String dirPrefix= if defined(outputDir) - then outputDir + "/" + String dirPrefix = if defined(outputDir) + then select_first([outputDir]) + "/" else "" String totalPrefix = dirPrefix + outPrefix From acec53cc1afbf8287790d018d0ba0f9ebc18fac2 Mon Sep 17 00:00:00 2001 From: jboom1 Date: Wed, 11 Dec 2019 17:17:11 +0100 Subject: [PATCH 0030/1208] Move config file creation to talon task. --- talon.wdl | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/talon.wdl b/talon.wdl index 57ffce90..351539da 100644 --- a/talon.wdl +++ b/talon.wdl @@ -446,7 +446,9 @@ task SummarizeDatasets { task Talon { input { - File configFile + Array[File] SAMfiles + String organism + String sequencingPlatform = "PacBio-RS-II" File databaseFile String genomeBuild Float minimumCoverage = 0.9 @@ -458,19 +460,24 @@ task Talon { String dockerImage = "biocontainers/talon:v4.4.1_cv1" } - command { + command <<< set -e mkdir -p $(dirname ~{outputPrefix}) export TMPDIR=/tmp + for file in ~{sep=" " SAMfiles} + do + configFileLine="$(basename ${file%.*}),~{organism},~{sequencingPlatform},${file}" + echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv + done talon \ - ~{"--f " + configFile} \ + --f + talonConfigFile.csv ~{"--db " + databaseFile} \ ~{"--build " + genomeBuild} \ ~{"--threads " + cores} \ ~{"--cov " + minimumCoverage} \ ~{"--identity " + minimumIdentity} \ ~{"--o " + outputPrefix + "/run"} - } + >>> output { File outputUpdatedDatabase = databaseFile From 24879064a2029da3ab7358fb38e890842766af6d Mon Sep 17 00:00:00 2001 From: jboom1 Date: Wed, 11 Dec 2019 17:20:33 +0100 Subject: [PATCH 0031/1208] Update CHANGELOG. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10d262ac..a34b0ec0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- + Update biowdl-input-converter in common.wdl to version 0.2.1. -+ Update TALON section to now include the new annotation file output. ++ Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. + Update TALON default image to version 4.4.1. version 2.1.0 From d682271ffc1d396576d2e1aa9b5730bfff600a9e Mon Sep 17 00:00:00 2001 From: jboom1 Date: Wed, 11 Dec 2019 17:23:02 +0100 Subject: [PATCH 0032/1208] Update parameter_meta for TALON task to now include the config file input requirements. --- talon.wdl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/talon.wdl b/talon.wdl index 351539da..29304314 100644 --- a/talon.wdl +++ b/talon.wdl @@ -492,8 +492,16 @@ task Talon { } parameter_meta { - configFile: { - description: "Dataset config file (comma-delimited).", + SAMfiles: { + description: "Input SAM files.", + category: "required" + } + organism: { + description: "The name of the organism from which the samples originated.", + category: "required" + } + sequencingPlatform: { + description: "The sequencing platform used to generate long reads.", category: "required" } databaseFile: { From b5b428f938cc22cd37a993af530fb8f344b90312 Mon Sep 17 00:00:00 2001 From: jboom1 Date: Wed, 11 Dec 2019 17:24:54 +0100 Subject: [PATCH 0033/1208] Update scripts submodule. --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 83bf72b9..fc603e5d 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 83bf72b91f6b9660f64dfa9d8096f6e57b167083 +Subproject commit fc603e5d408b89b99297fb5737586c059c5f9df6 From 4de118eb6fa9d040a8e37587f7f4f78ab7fb1d48 Mon Sep 17 00:00:00 2001 From: Jasper Boom Date: Thu, 12 Dec 2019 09:30:47 +0100 Subject: [PATCH 0034/1208] Remove unnecasary plus sign. Co-Authored-By: DavyCats --- talon.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/talon.wdl b/talon.wdl index 29304314..408c787f 100644 --- a/talon.wdl +++ b/talon.wdl @@ -470,7 +470,7 @@ task Talon { echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv done talon \ - --f + talonConfigFile.csv + --f talonConfigFile.csv ~{"--db " + databaseFile} \ ~{"--build " + genomeBuild} \ ~{"--threads " + cores} \ From 6271478e5c68ce26c65c190c40b1be435409ef4b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 12 Dec 2019 10:16:16 +0100 Subject: [PATCH 0035/1208] update changelog, remove unused inputs from cutadapt --- CHANGELOG.md | 4 ++++ cutadapt.wdl | 10 ---------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09418cc8..470e61b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Removed unused inputs (trimPrimer and format) for cutadapt. ++ Various minor command tweaks to increase stability. ++ Fixed unused inputs in bedtools sort (inputs are now used). ++ Added miniwdl check to linting. + Update TALON default image to version 4.4.1. version 2.1.0 diff --git a/cutadapt.wdl b/cutadapt.wdl index 571bc884..58b10d73 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -6,7 +6,6 @@ task Cutadapt { File? read2 String read1output = "cut_r1.fq.gz" String? read2output - String? format Array[String] adapter = [] Array[String] front = [] Array[String] anywhere = [] @@ -49,7 +48,6 @@ task Cutadapt { String? untrimmedPairedOutputPath Boolean? colorspace Boolean? doubleEncode - Boolean? trimPrimer Boolean? stripF3 Boolean? maq Boolean? bwa @@ -169,10 +167,6 @@ task Cutadapt { description: "The name of the resulting second end fastq file.", category: "common" } - format: { - description: "Equivalent to cutadapt's --format option.", - category: "advanced" - } adapter: { description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "common" @@ -341,10 +335,6 @@ task Cutadapt { description: "Equivalent to cutadapt's --double-encode flag.", category: "advanced" } - trimPrimer: { - description: "Equivalent to cutadapt's --trim-primer flag.", - category: "advanced" - } stripF3: { description: "Equivalent to cutadapt's --strip-f3 flag.", category: "advanced" From 86f473201798189475b6bef2014a145a84bb3304 Mon Sep 17 00:00:00 2001 From: jboom1 Date: Thu, 12 Dec 2019 10:57:21 +0100 Subject: [PATCH 0036/1208] Add config file to output for debug purposes. --- talon.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/talon.wdl b/talon.wdl index 455f38fd..1e0c6ffa 100644 --- a/talon.wdl +++ b/talon.wdl @@ -470,7 +470,7 @@ task Talon { echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv done talon \ - --f talonConfigFile.csv + --f ~{outputPrefix}/talonConfigFile.csv ~{"--db " + databaseFile} \ ~{"--build " + genomeBuild} \ ~{"--threads " + cores} \ @@ -483,6 +483,7 @@ task Talon { File outputUpdatedDatabase = databaseFile File outputLog = outputPrefix + "/run_QC.log" File outputAnnot = outputPrefix + "/run_talon_read_annot.tsv" + File outputConfigFile = outputPrefix + "/talonConfigFile.csv" } runtime { From c60c3f605d2d271acf0f9e78b4b6b7b5b124824c Mon Sep 17 00:00:00 2001 From: jboom1 Date: Thu, 12 Dec 2019 10:58:43 +0100 Subject: [PATCH 0037/1208] Update parameter_meta for TALON task to include config file. --- talon.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/talon.wdl b/talon.wdl index 1e0c6ffa..dfd92d6b 100644 --- a/talon.wdl +++ b/talon.wdl @@ -537,5 +537,9 @@ task Talon { description: "Read annotation file from TALON run.", category: "required" } + outputConfigFile: { + description: "The TALON configuration file.", + category: "required" + } } } From 5b97148762001986727d4aa6d2b1ffc217716cb0 Mon Sep 17 00:00:00 2001 From: jboom1 Date: Thu, 12 Dec 2019 12:04:15 +0100 Subject: [PATCH 0038/1208] Update config file flag. --- talon.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/talon.wdl b/talon.wdl index dfd92d6b..7082b921 100644 --- a/talon.wdl +++ b/talon.wdl @@ -470,7 +470,7 @@ task Talon { echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv done talon \ - --f ~{outputPrefix}/talonConfigFile.csv + ~{"--f" + outputPrefix + "/talonConfigFile.csv"} ~{"--db " + databaseFile} \ ~{"--build " + genomeBuild} \ ~{"--threads " + cores} \ From b4567cd01bda096514b98f126317d5bdea7f11f8 Mon Sep 17 00:00:00 2001 From: jboom1 Date: Thu, 12 Dec 2019 12:05:21 +0100 Subject: [PATCH 0039/1208] Add space to talon config file flag. --- talon.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/talon.wdl b/talon.wdl index 7082b921..07c10713 100644 --- a/talon.wdl +++ b/talon.wdl @@ -470,7 +470,7 @@ task Talon { echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv done talon \ - ~{"--f" + outputPrefix + "/talonConfigFile.csv"} + ~{"--f " + outputPrefix + "/talonConfigFile.csv"} ~{"--db " + databaseFile} \ ~{"--build " + genomeBuild} \ ~{"--threads " + cores} \ From 41930847395ca6e31a123d74466cfb1c0e955547 Mon Sep 17 00:00:00 2001 From: Jasper Boom Date: Thu, 12 Dec 2019 12:09:21 +0100 Subject: [PATCH 0040/1208] Add backslash. --- talon.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/talon.wdl b/talon.wdl index 07c10713..e2395cb7 100644 --- a/talon.wdl +++ b/talon.wdl @@ -470,7 +470,7 @@ task Talon { echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv done talon \ - ~{"--f " + outputPrefix + "/talonConfigFile.csv"} + ~{"--f " + outputPrefix + "/talonConfigFile.csv"} \ ~{"--db " + databaseFile} \ ~{"--build " + genomeBuild} \ ~{"--threads " + cores} \ From 5494df1584ba675a7817a857f91f44e40ee04e30 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 12 Dec 2019 14:27:49 +0100 Subject: [PATCH 0041/1208] make intervals optional, allow exclude intervals --- gatk.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index a48cb8b0..88c2018a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -207,7 +207,8 @@ task HaplotypeCallerGvcf { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Array[File]+ intervalList + Array[File]+? intervalList + Array[File]+? excludeIntervalList String gvcfPath File referenceFasta File referenceFastaIndex @@ -229,7 +230,8 @@ task HaplotypeCallerGvcf { -R ~{referenceFasta} \ -O ~{gvcfPath} \ -I ~{sep=" -I " inputBams} \ - -L ~{sep=' -L ' intervalList} \ + ~{true="-L" false="" defined(intervalList)} ~{sep=' -L ' intervalList} \ + ~{true="-XL" false="" defined(excludeIntervalList)} ~{sep=' -XL ' excludeIntervalList} \ ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \ -contamination ~{contamination} \ -ERC GVCF From e78386df65f7fc85c014735fe3a197a75110ed62 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 12 Dec 2019 15:02:22 +0100 Subject: [PATCH 0042/1208] added get chromsizes task --- bedtools.wdl | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/bedtools.wdl b/bedtools.wdl index f6748f31..1695d830 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -20,6 +20,29 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +# Technically not a bedtools task, but needed for bedtools complement. +task GetChromSizes { + input { + File faidx + # Debian for proper GNU Coreutils. Busybox sucks! + String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" + String outputFile = basename(faidx, "\.fai") + ".genome" + } + + # Get first two columns from the fasta index which note name and size. + command { + cut -f1,2 ~{faidx} > ~{outputFile} + } + + output { + File chromSizes = outputFile + } + + runtime { + docker: dockerImage + } +} + task Sort { input { File inputBed From 4c8b47d8b6833e1f6add1f05e2a6a2d3fbc0307e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 12 Dec 2019 15:10:39 +0100 Subject: [PATCH 0043/1208] add complement task --- bedtools.wdl | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index 1695d830..9ebe46c5 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -20,6 +20,43 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task Complement { + input { + File genome + File bedFile + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String outputFile = basename(bedFile, "\.bed") + ".complement.bed" + } + + command { + bedtools complement \ + -g ~{genome} \ + -i ~{bedFile} \ + > ~{outputFile} + } + + output { + File complementBed = outputFile + } + + runtime { + docker: dockerImage + } + + parameter_meta { + genome: {description: "Genome file with names and sizes", + category: "required"} + bedFile: {description: "The bedfile to complement", + category: "required"} + outputFile: {description: "The path to write the output to", + catgory: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + } +} + # Technically not a bedtools task, but needed for bedtools complement. task GetChromSizes { input { @@ -31,7 +68,8 @@ task GetChromSizes { # Get first two columns from the fasta index which note name and size. command { - cut -f1,2 ~{faidx} > ~{outputFile} + cut -f1,2 ~{faidx} \ + > ~{outputFile} } output { From cce02b2a4038b26b3e2882ce403608f3e603be88 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 12 Dec 2019 15:14:15 +0100 Subject: [PATCH 0044/1208] add parameter_meta --- bedtools.wdl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index 9ebe46c5..d3e9466f 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -49,7 +49,7 @@ task Complement { bedFile: {description: "The bedfile to complement", category: "required"} outputFile: {description: "The path to write the output to", - catgory: "advanced"} + category: "advanced"} dockerImage: { description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced" @@ -79,6 +79,18 @@ task GetChromSizes { runtime { docker: dockerImage } + + parameter_meta { + faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes", + category: "required"} + outputFile: {description: "The path to write the output to", + category: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + + } } task Sort { From f04efd479eb85ddbdb63616044dbc9b2d104b71a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 12 Dec 2019 15:17:58 +0100 Subject: [PATCH 0045/1208] fix miniwdl check, more consistent naming --- bedtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index d3e9466f..3c7a9b81 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -102,7 +102,7 @@ task Sort { Boolean chrThenSizeD = false Boolean chrThenScoreA = false Boolean chrThenScoreD = false - File? g + File? genome File? faidx String outputBed = "output.sorted.bed" String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -119,7 +119,7 @@ task Sort { ~{true="-chrThenSizeD" false="" chrThenSizeD} \ ~{true="-chrThenScoreA" false="" chrThenScoreA} \ ~{true="-chrThenScoreD" false="" chrThenScoreD} \ - ~{"-g " + g} \ + ~{"-g " + genome} \ ~{"-faidx" + faidx} \ > ~{outputBed} } From 9c0d6b9525e0bf8cd934f7201f7e281094432a00 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 12 Dec 2019 15:39:52 +0100 Subject: [PATCH 0046/1208] consistent naming --- bedtools.wdl | 43 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 3c7a9b81..2e65c7d7 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -23,20 +23,20 @@ version 1.0 task Complement { input { File genome - File bedFile + File inputBed String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" - String outputFile = basename(bedFile, "\.bed") + ".complement.bed" + String outputBed = basename(inputBed, "\.bed") + ".complement.bed" } command { bedtools complement \ -g ~{genome} \ - -i ~{bedFile} \ - > ~{outputFile} + -i ~{inputBed} \ + > ~{outputBed} } output { - File complementBed = outputFile + File complementBed = outputBed } runtime { @@ -46,9 +46,9 @@ task Complement { parameter_meta { genome: {description: "Genome file with names and sizes", category: "required"} - bedFile: {description: "The bedfile to complement", + inputBed: {description: "The inputBed to complement", category: "required"} - outputFile: {description: "The path to write the output to", + outputBed: {description: "The path to write the output to", category: "advanced"} dockerImage: { description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -93,6 +93,33 @@ task GetChromSizes { } } +task Merge { + input { + Array[File]+ inputBed + String outputBed = "merged.bed" + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + + command { + bedtools merge -i ~{inputBed} > ~{outputBed} + } + + output { + File mergedBed = outputBed + } + + parameter_meta { + inputBed: {description: "The inputBed to complement", + category: "required"} + outputBed: {description: "The path to write the output to", + category: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + } +} + task Sort { input { File inputBed @@ -125,7 +152,7 @@ task Sort { } output { - File bedFile = outputBed + File sortedBed = outputBed } runtime { From 0cea836fb6b0fdedffd02f941096100490d7abc2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 12 Dec 2019 15:51:09 +0100 Subject: [PATCH 0047/1208] add task to merge multiple beds --- bedtools.wdl | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 2e65c7d7..74b51c00 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -95,7 +95,7 @@ task GetChromSizes { task Merge { input { - Array[File]+ inputBed + File inputBed String outputBed = "merged.bed" String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -108,8 +108,42 @@ task Merge { File mergedBed = outputBed } + runtime { + docker: dockerImage + } + parameter_meta { - inputBed: {description: "The inputBed to complement", + inputBed: {description: "The bed to merge", + category: "required"} + outputBed: {description: "The path to write the output to", + category: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + } +} + +# Use cat, bedtools sort and bedtools merge to merge bedfiles in a single task. +task MergeBedFiles { + input { + Array[File] bedFiles + String outputBed = "merged.bed" + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + command { + cat ~{sep=" " bedFiles} | bedtools sort | bedtools merge > ~{outputBed} + } + + output { + File mergedBed = outputBed + } + + runtime { + docker: dockerImage + } + parameter_meta { + bedFiles: {description: "The bed files to merge", category: "required"} outputBed: {description: "The path to write the output to", category: "advanced"} From eae8f2dfa71b6cbdc83299cf2c4c6a4de91750b0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 12 Dec 2019 15:51:41 +0100 Subject: [PATCH 0048/1208] add comment --- bedtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bedtools.wdl b/bedtools.wdl index 74b51c00..3e847d2d 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -131,6 +131,8 @@ task MergeBedFiles { String outputBed = "merged.bed" String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } + + # A sorted bed is needed for bedtools merge command { cat ~{sep=" " bedFiles} | bedtools sort | bedtools merge > ~{outputBed} } From 0162556c3cf51117bcaa5a6bec309a8247ea05dd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 12 Dec 2019 16:16:01 +0100 Subject: [PATCH 0049/1208] make complement simpler by using fasta index --- bedtools.wdl | 46 +++++++--------------------------------------- 1 file changed, 7 insertions(+), 39 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 3e847d2d..d775a4b3 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -22,15 +22,19 @@ version 1.0 task Complement { input { - File genome + File faidx File inputBed String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" String outputBed = basename(inputBed, "\.bed") + ".complement.bed" } + # Use a fasta index file to get the genome sizes. And convert that to the + # bedtools specific "genome" format. command { + set -e + cut -f1,2 ~{faidx} > sizes.genome bedtools complement \ - -g ~{genome} \ + -g sizes.genome \ -i ~{inputBed} \ > ~{outputBed} } @@ -44,7 +48,7 @@ task Complement { } parameter_meta { - genome: {description: "Genome file with names and sizes", + faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes", category: "required"} inputBed: {description: "The inputBed to complement", category: "required"} @@ -57,42 +61,6 @@ task Complement { } } -# Technically not a bedtools task, but needed for bedtools complement. -task GetChromSizes { - input { - File faidx - # Debian for proper GNU Coreutils. Busybox sucks! - String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" - String outputFile = basename(faidx, "\.fai") + ".genome" - } - - # Get first two columns from the fasta index which note name and size. - command { - cut -f1,2 ~{faidx} \ - > ~{outputFile} - } - - output { - File chromSizes = outputFile - } - - runtime { - docker: dockerImage - } - - parameter_meta { - faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes", - category: "required"} - outputFile: {description: "The path to write the output to", - category: "advanced"} - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } - - } -} - task Merge { input { File inputBed From ce556e389a4bf6f019cace0a3d2cfb35d01fc1c8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 17 Dec 2019 16:37:04 +0100 Subject: [PATCH 0050/1208] add ploidy to haplotypecaller --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 88c2018a..0c02450f 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -216,6 +216,7 @@ task HaplotypeCallerGvcf { Float contamination = 0.0 File? dbsnpVCF File? dbsnpVCFIndex + Int? ploidy String memory = "12G" String javaXmx = "4G" @@ -230,6 +231,7 @@ task HaplotypeCallerGvcf { -R ~{referenceFasta} \ -O ~{gvcfPath} \ -I ~{sep=" -I " inputBams} \ + ~{"--sample-ploidy " + ploidy} \ ~{true="-L" false="" defined(intervalList)} ~{sep=' -L ' intervalList} \ ~{true="-XL" false="" defined(excludeIntervalList)} ~{sep=' -XL ' excludeIntervalList} \ ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \ From a976eacffec6481635b357bc96e1d7a72cf3ac6a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 6 Jan 2020 11:18:36 +0100 Subject: [PATCH 0051/1208] update htseq version --- htseq.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htseq.wdl b/htseq.wdl index 3afe51a4..63ea849a 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -13,7 +13,7 @@ task HTSeqCount { Array[String] additionalAttributes = [] String memory = "40G" - String dockerImage = "quay.io/biocontainers/htseq:0.9.1--py36h7eb728f_2" + String dockerImage = "quay.io/biocontainers/htseq:0.11.2--py37h637b7d7_1" } command { From ae1f38655a426ebbe3deff71e6309724ab895bed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 6 Jan 2020 11:19:26 +0100 Subject: [PATCH 0052/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e102e29..4f8371ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Update htseq to default image version 0.11.2 + Update biowdl-input-converter in common.wdl to version 0.2.1. + Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. + Removed unused inputs (trimPrimer and format) for cutadapt. From d95d1da9786a45c632bc5826da7fd5a221cc99ed Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Jan 2020 15:50:20 +0100 Subject: [PATCH 0053/1208] Fix SQLite error concerning database/disk space being full. --- talon.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/talon.wdl b/talon.wdl index e2395cb7..d63cada7 100644 --- a/talon.wdl +++ b/talon.wdl @@ -463,7 +463,10 @@ task Talon { command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" - export TMPDIR=/tmp + mkdir -p $PWD/tmp #Standard /tmp fills up which makes the SQLite process crash. + ln -s $PWD/tmp /tmp/sqltmp #Multiprocessing will crash if the absolute path is too long. + export TMPDIR=/tmp/sqltmp + printf "" > ~{outputPrefix}/talonConfigFile.csv #File needs to be emptied when task is rerun. for file in ~{sep=" " SAMfiles} do configFileLine="$(basename ${file%.*}),~{organism},~{sequencingPlatform},${file}" From c4aaeaa07a06942eac4ee01a57da056026863dd0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Jan 2020 15:54:55 +0100 Subject: [PATCH 0054/1208] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f8371ff..38c95774 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ TALON: Fix SQLite error concerning database/disk space being full. + Update htseq to default image version 0.11.2 + Update biowdl-input-converter in common.wdl to version 0.2.1. + Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. From 624826f72d8045442768d5e641fdad5835548f3a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Jan 2020 13:40:06 +0100 Subject: [PATCH 0055/1208] add parameter_meta to picard metrics --- CHANGELOG.md | 2 ++ picard.wdl | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++-- scripts | 2 +- 3 files changed, 91 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f8371ff..49db6bba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Added parameter_meta sections to a variety of tasks. ++ Picard's BedToIntervalList outputPath ipnut is now optional (with a default of "regions.interval_list") + Update htseq to default image version 0.11.2 + Update biowdl-input-converter in common.wdl to version 0.2.1. + Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. diff --git a/picard.wdl b/picard.wdl index caee009b..d5daa248 100644 --- a/picard.wdl +++ b/picard.wdl @@ -4,7 +4,7 @@ task BedToIntervalList { input { File bedFile File dict - String outputPath + String outputPath = "regions.interval_list" String memory = "12G" String javaXmx = "4G" @@ -29,6 +29,18 @@ task BedToIntervalList { docker: dockerImage memory: memory } + + parameter_meta { + bedfile: {description: "A bed file", category: "required"} + dict: {description: "A sequence dict file.", category: "required"} + outputPath: {description: "The location the output interval list should be written to.", + category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task CollectMultipleMetrics { @@ -119,10 +131,44 @@ task CollectMultipleMetrics { } runtime { - docker: dockerImage memory: memory } + + parameter_meta { + inputBam: {description: "The input BAM file for which metrics will be collected.", + category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", + category: "required"} + collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", + category: "common"} + collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", + category: "common"} + qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", + category: "common"} + meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", + category: "common"} + collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", + category: "common"} + collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", + category: "common"} + collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", + category: "common"} + collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", + category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task CollectRnaSeqMetrics { @@ -159,6 +205,23 @@ task CollectRnaSeqMetrics { docker: dockerImage memory: memory } + + parameter_meta { + inputBam: {description: "The input BAM file for which metrics will be collected.", + category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + refRefflat: {description: "A refflat file containing gene annotations", catehory: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", + category: "required"} + strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics", + category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task CollectTargetedPcrMetrics { @@ -201,6 +264,29 @@ task CollectTargetedPcrMetrics { docker: dockerImage memory: memory } + + parameter_meta { + inputBam: {description: "The input BAM file for which metrics will be collected.", + category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + ampliconIntervals: {description: "An interval list describinig the coordinates of the amplicons sequenced.", + category: "required"} + targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.", + category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", + category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs diff --git a/scripts b/scripts index fc603e5d..0817a9da 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit fc603e5d408b89b99297fb5737586c059c5f9df6 +Subproject commit 0817a9dad46ff031ada83944bfc5f7c6b88b9d13 From a705eadf22d3858ecee8dce6fe6f498fcef68a99 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Jan 2020 13:59:40 +0100 Subject: [PATCH 0056/1208] typo --- picard.wdl | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/picard.wdl b/picard.wdl index d5daa248..04a75013 100644 --- a/picard.wdl +++ b/picard.wdl @@ -31,7 +31,7 @@ task BedToIntervalList { } parameter_meta { - bedfile: {description: "A bed file", category: "required"} + bedfile: {description: "A bed file.", category: "required"} dict: {description: "A sequence dict file.", category: "required"} outputPath: {description: "The location the output interval list should be written to.", category: "advanced"} @@ -147,21 +147,21 @@ task CollectMultipleMetrics { basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", - category: "common"} + category: "advanced"} collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", - category: "common"} + category: "advanced"} qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", - category: "common"} + category: "advanced"} meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", - category: "common"} + category: "advanced"} collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", - category: "common"} + category: "advanced"} collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", - category: "common"} + category: "advanced"} collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", - category: "common"} + category: "advanced"} collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", - category: "common"} + category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", @@ -210,10 +210,10 @@ task CollectRnaSeqMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} - refRefflat: {description: "A refflat file containing gene annotations", catehory: "required"} + refRefflat: {description: "A refflat file containing gene annotations.", catehory: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics", + strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 3c8c98778bb95572ac43be6679980c654954d5c9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Jan 2020 14:06:35 +0100 Subject: [PATCH 0057/1208] typo --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 04a75013..e76d5c6d 100644 --- a/picard.wdl +++ b/picard.wdl @@ -31,7 +31,7 @@ task BedToIntervalList { } parameter_meta { - bedfile: {description: "A bed file.", category: "required"} + bedFile: {description: "A bed file.", category: "required"} dict: {description: "A sequence dict file.", category: "required"} outputPath: {description: "The location the output interval list should be written to.", category: "advanced"} From d07fee7b3c5b736efebab91cd592c36b91883b7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Jan 2020 15:07:07 +0100 Subject: [PATCH 0058/1208] modify collect-columns parameter_meta --- collect-columns.wdl | 66 +++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 44 deletions(-) diff --git a/collect-columns.wdl b/collect-columns.wdl index d453e5be..09788e20 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -44,49 +44,27 @@ task CollectColumns { } parameter_meta { - inputTables: { - description: "The tables from which columns should be taken.", - category: "required" - } - outputPath: { - description: "The path to which the output should be written.", - category: "required" - } - featureColumn: { - description: "Equivalent to the -f option of collect-columns.", - category: "common" # Should likely be controlled by the calling workflow - } - valueColumn: { - description: "Equivalent to the -c option of collect-columns.", - category: "common" # Should likely be controlled by the calling workflow - } - separator: { - description: "Equivalent to the -s option of collect-columns.", - category: "common" # Should likely be controlled by the calling workflow - } - sampleNames: { - description: "Equivalent to the -n option of collect-columns.", - category: "common" # Should likely be controlled by the calling workflow - } - header: { - description: "Equivalent to the -H flag of collect-columns.", - category: "common" - } - additionalAttributes: { - description: "Equivalent to the -a option of collect-columns.", - category: "advanced" - } - referenceGtf: { - description: "Equivalent to the -g option of collect-columns.", - category: "advanced" - } - featureAttribute: { - description: "Equivalent to the -F option of collect-columns.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + inputTables: {description: "The tables from which columns should be taken.", + category: "required"} + outputPath: {description: "The path to which the output should be written.", + category: "required"} + featureColumn: {description: "Equivalent to the -f option of collect-columns.", + category: "advanced"} + valueColumn: {description: "Equivalent to the -c option of collect-columns.", + category: "advanced"} + separator: {description: "Equivalent to the -s option of collect-columns.", + category: "advanced"} + sampleNames: {description: "Equivalent to the -n option of collect-columns.", + category: "advanced"} + header: {description: "Equivalent to the -H flag of collect-columns.", + category: "advanced"} + additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", + category: "advanced"} + referenceGtf: {description: "Equivalent to the -g option of collect-columns.", + category: "advanced"} + featureAttribute: {description: "Equivalent to the -F option of collect-columns.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } \ No newline at end of file From 1a84192c65162aef619e9083a962692026516c10 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Jan 2020 15:20:29 +0100 Subject: [PATCH 0059/1208] update scripts --- .travis.yml | 2 +- scripts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 396b998f..fec93c74 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,4 +19,4 @@ before_install: install: - conda install --file requirements-test.txt -script: bash scripts/biowdl_lint.sh +script: bash scripts/biowdl_lint.sh skip-wdl-aid diff --git a/scripts b/scripts index 0817a9da..76d1e695 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 0817a9dad46ff031ada83944bfc5f7c6b88b9d13 +Subproject commit 76d1e695812aecd55fdf0221dc08b25d3ac7dde1 From ba7b2545d6132a5a1030f827955e8e8cfed77564 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Jan 2020 17:01:26 +0100 Subject: [PATCH 0060/1208] add parameter_meta to ApplyBQSR, GatherBamFiles and GatherVcfFiles --- gatk.wdl | 19 +++++++++++++++++++ picard.wdl | 24 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index a48cb8b0..04112be0 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -45,6 +45,25 @@ task ApplyBQSR { docker: dockerImage memory: memory } + + parameter_meta { + inputBam: {description: "The BAM file which should be recalibrated.", category: "required"} + inputBamIndex: {description: "The input BAM file's index.", category: "required"} + outputBamPath: {description: "The location the resulting BAM file should be written.", category: "required"} + recalibrationReport: {description: "The BQSR report the be used for recalibration.", category: "required"} + sequenceGroupInterval: {description: "The regions to operate on", category: "advanced"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } # Generate Base Quality Score Recalibration (BQSR) model diff --git a/picard.wdl b/picard.wdl index e76d5c6d..e7f494f1 100644 --- a/picard.wdl +++ b/picard.wdl @@ -322,6 +322,18 @@ task GatherBamFiles { docker: dockerImage memory: memory } + + parameter_meta { + inputBams: {description: "The BAM files to be merged together.", category: "required"} + inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} + outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task GatherVcfs { @@ -352,6 +364,18 @@ task GatherVcfs { docker: dockerImage memory: memory } + + parameter_meta { + inputVcfs: {description: "The VCF files to be merged together.", category: "required"} + inputVcfIndexes: {description: "The indexes of the input VCF files.", category: "required"} + outputVcfPath: {description: "The path where the merged VCF file will be written.", caregory: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } # Mark duplicate reads to avoid counting non-independent observations From 0b2a14fcc25b6dfc1617cc4253577512ea4dacb3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Jan 2020 17:59:17 +0100 Subject: [PATCH 0061/1208] add parameter_meta to BaseRecibrator --- gatk.wdl | 26 ++++++++++++++++++++++++-- picard.wdl | 12 ++++++------ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 04112be0..144d5739 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -51,7 +51,7 @@ task ApplyBQSR { inputBamIndex: {description: "The input BAM file's index.", category: "required"} outputBamPath: {description: "The location the resulting BAM file should be written.", category: "required"} recalibrationReport: {description: "The BQSR report the be used for recalibration.", category: "required"} - sequenceGroupInterval: {description: "The regions to operate on", category: "advanced"} + sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", @@ -59,7 +59,7 @@ task ApplyBQSR { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -108,6 +108,28 @@ task BaseRecalibrator { docker: dockerImage memory: memory } + + parameter_meta { + inputBam: {description: "The BAM file to generate a BQSR report for.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + recalibrationReportPath: {description: "The location to write the BQSR report to.", category: "required"} + sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advancded"} + knownIndelsSitesVCFs: {description: "VCf files with known indels.", category: "advanced"} + knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs", category: "advanced"} + dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} + dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.)", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task CombineGVCFs { diff --git a/picard.wdl b/picard.wdl index e7f494f1..bc6f464c 100644 --- a/picard.wdl +++ b/picard.wdl @@ -36,7 +36,7 @@ task BedToIntervalList { outputPath: {description: "The location the output interval list should be written to.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -164,7 +164,7 @@ task CollectMultipleMetrics { category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -217,7 +217,7 @@ task CollectRnaSeqMetrics { category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -282,7 +282,7 @@ task CollectTargetedPcrMetrics { category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -329,7 +329,7 @@ task GatherBamFiles { outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -371,7 +371,7 @@ task GatherVcfs { outputVcfPath: {description: "The path where the merged VCF file will be written.", caregory: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. (Should be lower than `memory` to accommodate JVM overhead.", + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 3dc469ed4411a066fbbe31c087b545d0d3a8bc7f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Jan 2020 18:00:03 +0100 Subject: [PATCH 0062/1208] typos --- gatk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 144d5739..e04d4996 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -115,7 +115,7 @@ task BaseRecalibrator { recalibrationReportPath: {description: "The location to write the BQSR report to.", category: "required"} sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advancded"} knownIndelsSitesVCFs: {description: "VCf files with known indels.", category: "advanced"} - knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs", category: "advanced"} + knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", @@ -125,7 +125,7 @@ task BaseRecalibrator { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.)", + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From bbb8ff152c2654f9c69ea8ae8e8813a5590b625f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Jan 2020 10:00:16 +0100 Subject: [PATCH 0063/1208] add parameter_meta to CombineGVCFs, GatherBqsrReports, GenotypeGVCFs and HaplotypeCallerGvcf --- gatk.wdl | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index e04d4996..676f2fd5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -167,6 +167,24 @@ task CombineGVCFs { docker: dockerImage memory: memory } + + parameter_meta { + gvcfFiles: {description: "The GVCF files to be combined.", category: "required"} + gvcfFilesIndex: {description: "The indexes for the GVCF files.", caregory: "required"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} + outputPath: {description: "The location the combined GVCF should be written to.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } # Combine multiple recalibration tables from scattered BaseRecalibrator runs @@ -197,6 +215,17 @@ task GatherBqsrReports { docker: dockerImage memory: memory } + + parameter_meta { + inputBQSRreports: {description: "The BQSR reports to be merged.", category: "required"} + outputReportPath: {description: "The location of the combined BQSR report.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task GenotypeGVCFs { @@ -241,6 +270,26 @@ task GenotypeGVCFs { docker: dockerImage memory: memory } + + parameter_meta { + gvcfFiles: {description: "The GVCF files to be genotypes.", category: "required"} + gvcfFilesIndex: {description: "The index of the input GVCF files.", category: "required"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} + outputPath: {description: "The location to write the output VCf file to.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} + dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } # Call variants on a single sample with HaplotypeCaller to produce a GVCF @@ -285,6 +334,27 @@ task HaplotypeCallerGvcf { docker: dockerImage memory: memory } + + parameter_meta { + inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} + inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} + intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} + gvcfPath: {description: "The location to write the output GVCF to.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} + dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} + dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task MuTect2 { From 7aed677ae0cc08015953b968db223540cd6b45a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Jan 2020 14:04:32 +0100 Subject: [PATCH 0064/1208] add parameter_meta for SplitNCigarReads, ReorderGlobbedScatter and ScatterRegions --- biopet/biopet.wdl | 25 +++++++++++++++++++++++++ gatk.wdl | 20 +++++++++++++++++++- 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 8cf23813..f91f93ea 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -226,6 +226,12 @@ task ReorderGlobbedScatters { # 4 gigs of memory to be able to build the docker image in singularity memory: "4G" } + + parameter_meta { + scatters: {description: "The files which should be ordered.", category: "required"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task ScatterRegions { @@ -268,6 +274,25 @@ task ScatterRegions { docker: dockerImage memory: memory } + + parameter_meta { + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + scatterSize: {description: "Equivalent to biopet scatterregions' `-s` option.", category: "common"} + regions: {description: "The regions to be scattered.", category: "advanced"} + notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", + category: "advanced"} + bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", + category: "advanced"} + bamIndex: {description: "The index for the bamfile given through bamFile.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task ValidateAnnotation { diff --git a/gatk.wdl b/gatk.wdl index 676f2fd5..99b562b2 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -113,7 +113,7 @@ task BaseRecalibrator { inputBam: {description: "The BAM file to generate a BQSR report for.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} recalibrationReportPath: {description: "The location to write the BQSR report to.", category: "required"} - sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advancded"} + sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"} knownIndelsSitesVCFs: {description: "VCf files with known indels.", category: "advanced"} knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} @@ -614,6 +614,24 @@ task SplitNCigarReads { docker: dockerImage memory: memory } + + parameter_meta { + inputBam: {description: "The BAM file for which spliced reads should be split.", category: "required"} + inputBamIndex: {description: "The input BAM file's index.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputBam: {description: "The location the output BAM file should be written.", category: "required"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task CombineVariants { From 843f0bfd8383d2e18e351f5ac4a40818753e146b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Jan 2020 14:14:47 +0100 Subject: [PATCH 0065/1208] typo --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 99b562b2..db9a8333 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -344,7 +344,7 @@ task HaplotypeCallerGvcf { category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + referenceFastaIndex: {description: "The index for the reference fasta file.", category: "required"} contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} From 765cf25f7542f7b18840dcd1834e167a0fa711fe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Jan 2020 14:40:38 +0100 Subject: [PATCH 0066/1208] add parameter_meta to MarkDuplicates and MergeVCFs --- picard.wdl | 26 ++++++++++++++++++++++++++ scripts | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index bc6f464c..ea11a270 100644 --- a/picard.wdl +++ b/picard.wdl @@ -430,6 +430,20 @@ task MarkDuplicates { docker: dockerImage memory: memory } + + parameter_meta { + inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} + inputBamIndexes: {description: "Th eindexes for the input BAM files.", category: "required"} + outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} + metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} + read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } # Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs @@ -465,6 +479,18 @@ task MergeVCFs { docker: dockerImage memory: memory } + + parameter_meta { + inputVCFs: {description: "The VCF files to be merged.", category: "required"} + inputVCFsIndexes: {description: "The indexes of the VCF files.", category: "required"} + outputVcfPath: {description: "The location the output VCf file should be written to.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task SamToFastq { diff --git a/scripts b/scripts index 76d1e695..fc603e5d 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 76d1e695812aecd55fdf0221dc08b25d3ac7dde1 +Subproject commit fc603e5d408b89b99297fb5737586c059c5f9df6 From 370cef681e2ca78c2a4e285d0cfdb3844f81d83b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Jan 2020 14:50:26 +0100 Subject: [PATCH 0067/1208] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index fc603e5d..76d1e695 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit fc603e5d408b89b99297fb5737586c059c5f9df6 +Subproject commit 76d1e695812aecd55fdf0221dc08b25d3ac7dde1 From b4932de6390ae20f402b147c3b528dfc9ca6adc3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Jan 2020 16:08:36 +0100 Subject: [PATCH 0068/1208] add parameter_meta to multiqc --- CHANGELOG.md | 3 ++- multiqc.wdl | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 49db6bba..04864260 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,9 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Removed unused "verbose" and "quiet" inputs from multiqc. + Added parameter_meta sections to a variety of tasks. -+ Picard's BedToIntervalList outputPath ipnut is now optional (with a default of "regions.interval_list") ++ Picard's BedToIntervalList outputPath input is now optional (with a default of "regions.interval_list") + Update htseq to default image version 0.11.2 + Update biowdl-input-converter in common.wdl to version 0.2.1. + Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. diff --git a/multiqc.wdl b/multiqc.wdl index 1c5ecfd4..f30cadea 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -34,8 +34,6 @@ task MultiQC { Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload. File? config # A directory String? clConfig - Boolean verbose = false - Boolean quiet = false Array[Boolean] finished = [] # An array of booleans that can be used to let multiqc wait on stuff. String memory = "4G" @@ -92,4 +90,49 @@ task MultiQC { memory: memory docker: dockerImage } + + parameter_meta { + analysisDirectory: {description: "The directory to run MultiQC on.", category: "required"} + dependencies: {description: "This must be used in order to run multiqc after these tasks.", category: "internal_use_only"} + force: {description: "Equivalent to MultiQC's `--force` flag.", category: "advanced"} + dirs: {description: "Equivalent to MultiQC's `--dirs` flag.", category: "advanced"} + dirsDepth: {description: "Equivalent to MultiQC's `--dirs-depth` option.", category: "advanced"} + fullNames: {description: "Equivalent to MultiQC's `--fullnames` flag.", category: "advanced"} + title: {description: "Equivalent to MultiQC's `--title` option.", category: "advanced"} + comment: {description: "Equivalent to MultiQC's `--comment` option.", category: "advanced"} + fileName: {description: "Equivalent to MultiQC's `--filename` option.", category: "advanced"} + outDir: {description: "Directory in whihc the output should be written.", category: "common"} + template: {description: "Equivalent to MultiQC's `--template` option.", category: "advanced"} + tag: {description: "Equivalent to MultiQC's `--tag` option.", category: "advanced"} + ignore: {description: "Equivalent to MultiQC's `--ignore` option.", category: "advanced"} + ignoreSamples: {description: "Equivalent to MultiQC's `--ignore-samples` option.", category: "advanced"} + ignoreSymlinks: {description: "Equivalent to MultiQC's `--ignore-symlinks` flag.", category: "advanced"} + sampleNames: {description: "Equivalent to MultiQC's `--sample-names` option.", category: "advanced"} + fileList: {description: "Equivalent to MultiQC's `--file-list` option.", category: "advanced"} + exclude: {description: "Equivalent to MultiQC's `--exclude` option.", category: "advanced"} + module: {description: "Equivalent to MultiQC's `--module` option.", category: "advanced"} + dataDir: {description: "Equivalent to MultiQC's `--data-dir` flag.", category: "advanced"} + noDataDir: {description: "Equivalent to MultiQC's `--no-data-dir` flag.", category: "advanced"} + dataFormat: {description: "Equivalent to MultiQC's `--data-format` option.", category: "advanced"} + zipDataDir: {description: "Equivalent to MultiQC's `--zip-data-dir` flag.", category: "advanced"} + export: {description: "Equivalent to MultiQC's `--export` flag.", category: "advanced"} + flat: {description: "Equivalent to MultiQC's `--flat` flag.", category: "advanced"} + interactive: {description: "Equivalent to MultiQC's `--interactive` flag.", category: "advanced"} + lint: {description: "Equivalent to MultiQC's `--lint` flag.", category: "advanced"} + pdf: {description: "Equivalent to MultiQC's `--pdf` flag.", category: "advanced"} + megaQCUpload: {description: "Opposite to MultiQC's `--no-megaqc-upload` flag.", category: "advanced"} + config: {description: "Equivalent to MultiQC's `--config` option.", category: "advanced"} + clConfig: {description: "Equivalent to MultiQC's `--cl-config` option.", category: "advanced"} + finished: {description: "An array of booleans that can be used to let multiqc wait on stuff.", category: "internal_use_only"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } + + meta { + WDL_AID: { + exclude: ["finished", "dependencies"] + } + } } From 7b92c9f5ccd81b7ab15f29e0ddcaa4703296cfc7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Jan 2020 09:47:03 +0100 Subject: [PATCH 0069/1208] add parameter_meta to samtools --- samtools.wdl | 90 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 7 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 492cfaf4..73aa9525 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,6 +26,14 @@ task BgzipAndIndex { runtime { docker: dockerImage } + + parameter_meta { + inputFile: {description: "The file to be compressed and indexed.", category: "required"} + outputDir: {description: "The directory in which the output will be placed.", category: "required"} + type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task Index { @@ -60,6 +68,14 @@ task Index { runtime { docker: dockerImage } + + parameter_meta { + bamFile: {description: "The BAM file for which an index should be made.", category: "required"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", + category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task Merge { @@ -87,6 +103,14 @@ task Merge { runtime { docker: dockerImage } + + parameter_meta { + bamFiles: {description: "The BAM files to merge.", category: "required"} + outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task SortByName { @@ -110,6 +134,13 @@ task SortByName { runtime { docker: dockerImage } + + parameter_meta { + bamFile: {description: "The BAM file to get sorted.", category: "required"} + outputBamPath: {description: "The location the sorted BAM file should be written to.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task Markdup { @@ -133,6 +164,13 @@ task Markdup { runtime { docker: dockerImage } + + parameter_meta { + inputBam: {description: "The BAM file to be processed.", category: "required"} + outputBamPath: {description: "The location of the output BAM file.", category: "required"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task Flagstat { @@ -156,6 +194,13 @@ task Flagstat { runtime { docker: dockerImage } + + parameter_meta { + inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} + outputPath: {description: "The location the ouput should be written to.", category: "required"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task Fastq { @@ -204,13 +249,19 @@ task Fastq { } parameter_meta { - inputBam: "The bam file to process." - outputRead1: "If only outputRead1 is given '-s' flag is assumed. Else '-1'." - includeFilter: "Include reads with ALL of these flags. Corresponds to '-f'" - excludeFilter: "Exclude reads with ONE OR MORE of these flags. Corresponds to '-F'" - excludeSpecificFilter: "Exclude reads with ALL of these flags. Corresponds to '-G'" - appendReadNumber: "Append /1 and /2 to the read name, or don't. Corresponds to '-n/N" - + inputBam: {description: "The bam file to process.", category: "required"} + outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} + outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} + outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} + includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} + excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} + excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} + appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} + outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } @@ -240,6 +291,15 @@ task Tabix { runtime { docker: dockerImage } + + parameter_meta { + inputFile: {description: "The file to be indexed.", category: "required"} + outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", + category: "common"} + type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task View { @@ -286,4 +346,20 @@ task View { memory: memory docker: dockerImage } + + parameter_meta { + inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} + outputFileName: {description: "The location the output BAM file should be written.", category: "common"} + uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} + includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} + excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} + excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} + MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } From f37b2874cd9f35f9bbb014103f73a9b6a17ebf6a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Jan 2020 11:05:29 +0100 Subject: [PATCH 0070/1208] add parameter_meta to bowtie --- bowtie.wdl | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/bowtie.wdl b/bowtie.wdl index 72a39641..18fd6146 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -80,6 +80,27 @@ task Bowtie { memory: memory docker: dockerImage } + + parameter_meta { + readsUpstream: {description: "The first-/single-end fastq files.", category: "required"} + readsDownstream: {description: "The second-end fastq files.", category: "common"} + outputPath: {description: "The location the output BAM file should be written to.", category: "common"} + indexFiles: {description: "The index files for bowtie.", category: "required"} + seedmms: {description: "Equivalent to bowtie's `--seedmms` option.", category: "advanced"} + seedlen: {description: "Equivalent to bowtie's `--seedlen` option.", category: "advanced"} + k: {description: "Equivalent to bowtie's `-k` option.", category: "advanced"} + best: {description: "Equivalent to bowtie's `--best` flag.", category: "advanced"} + strata: {description: "Equivalent to bowtie's `--strata` flag.", category: "advanced"} + allowContain: {description: "Equivalent to bowtie's `--allow-contain` flag.", category: "advanced"} + samRG: {description: "Equivalent to bowtie's `--sam-RG` option.", category: "advanced"} + + picardXmx: {description: "The maximum memory available to the picard (used for sorting the output). Should be lower than `memory` to accommodate JVM overhead and bowtie's memory usage.", + category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } struct BowtieIndex { From 8495de6156e96c2e0c4ecbfe77f2a9456134c666 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Jan 2020 11:16:11 +0100 Subject: [PATCH 0071/1208] add parameter_meta to biowdl input converter and YamltoJson --- biowdl.wdl | 13 +++++++++++++ common.wdl | 7 +++++++ 2 files changed, 20 insertions(+) diff --git a/biowdl.wdl b/biowdl.wdl index 32fd5a73..7aa68b27 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -52,4 +52,17 @@ task InputConverter { runtime { docker: dockerImage } + + parameter_meta { + samplesheet: {description: "The samplesheet to be processed.", category: "required"} + outputFile: {description: "The location the JSON representation of the samplesheet should be written to.", + category: "advanced"} + skipFileCheck: {description: "Whether or not the existance of the files mentioned in the samplesheet should be checked.", + category: "advanced"} + checkFileMd5sums: {description: "Whether or not the MD5 sums of the files mentioned in the samplesheet should be checked.", + category: "advanced"} + old: {description: "Whether or not the old samplesheet format should be used.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } diff --git a/common.wdl b/common.wdl index f8b2cd8b..73325bf4 100644 --- a/common.wdl +++ b/common.wdl @@ -184,6 +184,13 @@ task YamlToJson { runtime { docker: dockerImage } + + parameter_meta { + yaml: {description: "The YAML file to convert.", category: "required"} + outputJson: {description: "The location the output JSON file should be written to.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } struct Reference { From d554c4baf0c6c431ec03aef0d744ae6813cb65d7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Jan 2020 13:50:56 +0100 Subject: [PATCH 0072/1208] add parameter_meta to various gatk tasks --- CHANGELOG.md | 2 + gatk.wdl | 113 ++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 109 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 04864260..d3bd6bd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Removed the "installDir" input from CombineVariants. ++ Removed the "extraArgs" input from FilterMutectCalls. + Removed unused "verbose" and "quiet" inputs from multiqc. + Added parameter_meta sections to a variety of tasks. + Picard's BedToIntervalList outputPath input is now optional (with a default of "regions.interval_list") diff --git a/gatk.wdl b/gatk.wdl index db9a8333..e0beeb54 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -407,6 +407,29 @@ task MuTect2 { docker: dockerImage memory: memory } + + parameter_meta { + inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} + inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputVcf: {description: "The location to write the output VCF file to.", category: "required"} + tumorSample: {description: "The name of the tumor/case sample.", category: "required"} + normalSample: {description: "The name of the normal/control sample.", category: "common"} + germlineResource: {description: "Equivalent to Mutect2's `--germline-resource` option.", category: "advanced"} + germlineResourceIndex: {description: "The index for the germline resource.", category: "advanced"} + panelOfNormals: {description: "Equivalent to Mutect2's `--panel-of-normals` option.", category: "advanced"} + panelOfNormalsIndex: {description: "The index for the panel of normals.", category: "advanced"} + f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"} + intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"} + outputStats: {description: "The location the output statistics should be written to.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task LearnReadOrientationModel { @@ -434,6 +457,15 @@ task LearnReadOrientationModel { docker: dockerImage memory: memory } + + parameter_meta { + f1r2TarGz: {description: "A f1r2TarGz file outputed by mutect2.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task MergeStats { @@ -461,6 +493,15 @@ task MergeStats { docker: dockerImage memory: memory } + + parameter_meta { + stats: {description: "Statistics files to be merged.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task GetPileupSummaries { @@ -496,6 +537,22 @@ task GetPileupSummaries { docker: dockerImage memory: memory } + + parameter_meta { + sampleBam: {description: "A BAM file for which a pileup should be created.", category: "required"} + sampleBamIndex: {description: "The index of the input BAM file.", category: "required"} + variantsForContamination: {description: "A VCF file with common variants.", category: "required"} + variantsForContaminationIndex: {description: "The index for the common variants VCF file.", category: "required"} + sitesForContamination: {description: "A bed file describing regions to operate on.", category: "required"} + sitesForContaminationIndex: {description: "The index for the bed file.", category: "required"} + outputPrefix: {description: "The prefix for the ouput.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task CalculateContamination { @@ -527,6 +584,16 @@ task CalculateContamination { docker: dockerImage memory: memory } + + parameter_meta { + tumorPileups: {description: "The pileup summary of a tumor/case sample.", category: "required"} + normalPileups: {description: "The pileup summary of the normal/control sample.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task FilterMutectCalls { @@ -542,7 +609,6 @@ task FilterMutectCalls { File? artifactPriors Int uniqueAltReadCount = 4 File mutect2Stats - String? extraArgs String memory = "24G" String javaXmx = "12G" @@ -563,8 +629,7 @@ task FilterMutectCalls { ~{"--unique-alt-read-count " + uniqueAltReadCount} \ ~{"-stats " + mutect2Stats} \ --filtering-stats "filtering.stats" \ - --showHidden \ - ~{extraArgs} + --showHidden } output { @@ -577,6 +642,26 @@ task FilterMutectCalls { docker: dockerImage memory: memory } + + parameter_meta { + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + unfilteredVcf: {description: "An unfiltered VCF file as produced by Mutect2.", category: "required"} + unfilteredVcfIndex: {description: "The index of the unfiltered VCF file.", category: "required"} + outputVcf: {description: "The location the filtered VCf file should be written.", category: "required"} + contaminationTable: {description: "Equivalent to FilterMutectCalls' `--contamination-table` option.", category: "advanced"} + mafTumorSegments: {description: "Equivalent to FilterMutectCalls' `--tumor-segmentation` option.", category: "advanced"} + artifactPriors: {description: "Equivalent to FilterMutectCalls' `--ob-priors` option.", category: "advanced"} + uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"} + mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task SplitNCigarReads { @@ -636,8 +721,6 @@ task SplitNCigarReads { task CombineVariants { input { - String installDir = "/usr" # .jar location in the docker image - File referenceFasta File referenceFastaFai File referenceFastaDict @@ -668,7 +751,7 @@ task CombineVariants { printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" done ') - java -Xmx~{javaXmx} -jar ~{installDir}/GenomeAnalysisTK.jar \ + java -Xmx~{javaXmx} -jar /usr/GenomeAnalysisTK.jar \ -T CombineVariants \ -R ~{referenceFasta} \ --genotypemergeoption ~{genotypeMergeOption} \ @@ -686,4 +769,22 @@ task CombineVariants { docker: dockerImage memory: memory } + + parameter_meta { + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + genotypeMergeOption: {description: "Equivalent to CombineVariants' `--genotypemergeoption` option.", category: "advanced"} + filteredRecordsMergeType: {description: "Equivalent to CombineVariants' `--filteredrecordsmergetype` option.", category: "advanced"} + identifiers: {description: "The sample identifiers in the same order as variantVcfs.", category: "required"} + variantVcfs: {description: "The input VCF files in the same order as identifiers.", category: "required"} + variantIndexes: {description: "The indexes of the input VCF files.", category: "required"} + outputPath: {description: "The location the output should be written to", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } From 63022ee493c43477bf1404adc28647eb7b29a2ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 13 Jan 2020 16:10:34 +0100 Subject: [PATCH 0073/1208] add parameter_meta to manta, strelka and somaticseq --- CHANGELOG.md | 2 + manta.wdl | 19 ++++++- somaticseq.wdl | 134 ++++++++++++++++++++++++++++++++++++++++++------- strelka.wdl | 43 ++++++++++++++++ 4 files changed, 179 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d3bd6bd1..d73879a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Removed unused "threads" input from ModifyStrelka. ++ Removed the "installDir" inputs from the somaticseq tasks. + Removed the "installDir" input from CombineVariants. + Removed the "extraArgs" input from FilterMutectCalls. + Removed unused "verbose" and "quiet" inputs from multiqc. diff --git a/manta.wdl b/manta.wdl index 21dd21c3..d0ca75e0 100644 --- a/manta.wdl +++ b/manta.wdl @@ -18,7 +18,6 @@ task Somatic { Int cores = 1 Int memoryGb = 4 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" - } command { @@ -56,4 +55,22 @@ task Somatic { memory: "~{memoryGb}G" docker: dockerImage } + + parameter_meta { + tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + runDir: {description: "The directory to use as run/output directory.", category: "common"} + callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} + callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} + + cores: {description: "The number of cores to use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } diff --git a/somaticseq.wdl b/somaticseq.wdl index 45015255..55dd4b94 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -2,8 +2,6 @@ version 1.0 task ParallelPaired { input { - String installDir = "/opt/somaticseq" #the location in the docker image - File? classifierSNV File? classifierIndel String outputDir @@ -33,7 +31,7 @@ task ParallelPaired { } command { - ~{installDir}/somaticseq_parallel.py \ + /opt/somaticseq/somaticseq_parallel.py \ ~{"--classifier-snv " + classifierSNV} \ ~{"--classifier-indel " + classifierIndel} \ --output-directory ~{outputDir} \ @@ -73,12 +71,40 @@ task ParallelPaired { cpu: threads docker: dockerImage } + + parameter_meta { + classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"} + classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"} + outputDir: {description: "The directory to write the output to.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} + exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} + tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} + mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"} + varscanSNV: {description: "An SNV VCF as produced by varscan.", category: "advanced"} + varscanIndel: {description: "An indel VCF as produced by varscan.", category: "advanced"} + jsmVCF: {description: "A VCF as produced by jsm.", category: "advanced"} + somaticsniperVCF: {description: "A VCF as produced by somaticsniper.", category: "advanced"} + vardictVCF: {description: "A VCF as produced by vardict.", category: "advanced"} + museVCF: {description: "A VCF as produced by muse.", category: "advanced"} + lofreqSNV: {description: "An SNV VCF as produced by lofreq.", category: "advanced"} + lofreqIndel: {description: "An indel VCF as produced by lofreq.", category: "advanced"} + scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} + strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} + strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task ParallelPairedTrain { input { - String installDir = "/opt/somaticseq" #the location in the docker image - File truthSNV File truthIndel String outputDir @@ -108,7 +134,7 @@ task ParallelPairedTrain { } command { - ~{installDir}/somaticseq_parallel.py \ + /opt/somaticseq/somaticseq_parallel.py \ --somaticseq-train \ --truth-snv ~{truthSNV} \ --truth-indel ~{truthIndel} \ @@ -147,12 +173,40 @@ task ParallelPairedTrain { cpu: threads docker: dockerImage } + + parameter_meta { + truthSNV: {description: "A VCF of true SNVs.", category: "required"} + truthIndel: {description: "A VCF of true indels.", category: "required"} + outputDir: {description: "The directory to write the output to.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} + exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} + tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} + mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"} + varscanSNV: {description: "An SNV VCF as produced by varscan.", category: "advanced"} + varscanIndel: {description: "An indel VCF as produced by varscan.", category: "advanced"} + jsmVCF: {description: "A VCF as produced by jsm.", category: "advanced"} + somaticsniperVCF: {description: "A VCF as produced by somaticsniper.", category: "advanced"} + vardictVCF: {description: "A VCF as produced by vardict.", category: "advanced"} + museVCF: {description: "A VCF as produced by muse.", category: "advanced"} + lofreqSNV: {description: "An SNV VCF as produced by lofreq.", category: "advanced"} + lofreqIndel: {description: "An indel VCF as produced by lofreq.", category: "advanced"} + scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} + strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} + strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task ParallelSingle { input { - String installDir = "/opt/somaticseq" #the location in the docker image - File? classifierSNV File? classifierIndel String outputDir @@ -174,7 +228,7 @@ task ParallelSingle { } command { - ~{installDir}/somaticseq_parallel.py \ + /opt/somaticseq/somaticseq_parallel.py \ ~{"--classifier-snv " + classifierSNV} \ ~{"--classifier-indel " + classifierIndel} \ --output-directory ~{outputDir} \ @@ -207,12 +261,32 @@ task ParallelSingle { cpu: threads docker: dockerImage } + + parameter_meta { + classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"} + classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"} + outputDir: {description: "The directory to write the output to.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} + exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} + bam: {description: "The input BAM file.", category: "required"} + bamIndex: {description: "The index for the input BAM file.", category: "required"} + mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"} + varscanVCF: {description: "A VCF as produced by varscan.", category: "advanced"} + vardictVCF: {description: "A VCF as produced by vardict.", category: "advanced"} + lofreqVCF: {description: "A VCF as produced by lofreq.", category: "advanced"} + scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} + strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task ParallelSingleTrain { input { - String installDir = "/opt/somaticseq" #the location in the docker image - File truthSNV File truthIndel String outputDir @@ -234,7 +308,7 @@ task ParallelSingleTrain { } command { - ~{installDir}/somaticseq_parallel.py \ + /opt/somaticseq/somaticseq_parallel.py \ --somaticseq-train \ --truth-snv ~{truthSNV} \ --truth-indel ~{truthIndel} \ @@ -266,23 +340,41 @@ task ParallelSingleTrain { cpu: threads docker: dockerImage } + + parameter_meta { + truthSNV: {description: "A VCF of true SNVs.", category: "required"} + truthIndel: {description: "A VCF of true indels.", category: "required"} + outputDir: {description: "The directory to write the output to.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + inclusionRegion: {description: "A bed file describing regions to include.", category: "common"} + exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"} + bam: {description: "The input BAM file.", category: "required"} + bamIndex: {description: "The index for the input BAM file.", category: "required"} + mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"} + varscanVCF: {description: "A VCF as produced by varscan.", category: "advanced"} + vardictVCF: {description: "A VCF as produced by vardict.", category: "advanced"} + lofreqVCF: {description: "A VCF as produced by lofreq.", category: "advanced"} + scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} + strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task ModifyStrelka { input { - String installDir = "/opt/somaticseq/vcfModifier" #the location in the docker image - File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") - - Int threads = 1 String dockerImage = "lethalfang/somaticseq:3.1.0" } command { set -e - ~{installDir}/modify_Strelka.py \ + /opt/somaticseq/vcfModifier/modify_Strelka.py \ -infile ~{strelkaVCF} \ -outfile "modified_strelka.vcf" @@ -295,7 +387,13 @@ task ModifyStrelka { } runtime { - cpu: threads docker: dockerImage } + + parameter_meta { + strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} + outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } diff --git a/strelka.wdl b/strelka.wdl index 2c8b5a06..212863ce 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -44,6 +44,23 @@ task Germline { cpu: cores memory: "~{memoryGb}G" } + + parameter_meta { + runDir: {description: "The directory to use as run/output directory.", category: "common"} + bams: {description: "The input BAM files.", category: "required"} + indexes: {description: "The indexes for the input BAM files.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} + callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} + rna: {description: "Whether or not the data is from RNA sequencing.", category: "common"} + + cores: {description: "The number of cores to use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task Somatic { @@ -96,4 +113,30 @@ task Somatic { cpu: cores memory: "~{memoryGb}G" } + + parameter_meta { + runDir: {description: "The directory to use as run/output directory.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} + tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} + callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} + indelCandidatesVcf: {description: "An indel candidates VCF file from manta.", category: "advanced"} + indelCandidatesVcfIndex: {description: "The index for the indel candidates VCF file.", category: "advanced"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} + + cores: {description: "The number of cores to use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } + + meta { + WDL_AID: { + exclude: ["doNotDefineThis"] + } + } } \ No newline at end of file From 85717efec10b69255bd99dc49973fcf229215929 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 13 Jan 2020 16:26:09 +0100 Subject: [PATCH 0074/1208] add parameter_meta to vardict --- vardict.wdl | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/vardict.wdl b/vardict.wdl index 85e5fd2b..ed9ee22d 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -69,4 +69,34 @@ task VarDict { memory: memory docker: dockerImage } + + parameter_meta { + tumorSampleName: {description: "The name of the tumor/case sample.", category: "required"} + tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} + normalSampleName: {description: "The name of the normal/control sample.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + bedFile: {description: "A bed file describing the regions to operate on. These regions must be below 1e6 bases in size.", category: "required"} + outputVcf: {description: "The location to write the output VCF file to.", category: "required"} + chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"} + startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"} + endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"} + geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"} + outputCandidateSomaticOnly: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-M` flag.", category: "advanced"} + outputAllVariantsAtSamePosition: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-A` flag.", category: "advanced"} + mappingQuality: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-Q` option.", category: "advanced"} + minimumTotalDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-d` option.", category: "advanced"} + minimumVariantDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-v` option.", category: "advanced"} + minimumAlleleFrequency: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-f` option.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } From f66d064407b06f74c5bb7f9ceeba708e39c6cfce Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 13 Jan 2020 16:39:50 +0100 Subject: [PATCH 0075/1208] add parameter_meta to chunked-scatter --- chunked-scatter.wdl | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 06f62baf..6b320368 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -30,4 +30,14 @@ task ChunkedScatter { memory: "4G" docker: dockerImage } + + parameter_meta { + inputFile: {description: "Either a bed file describing regiosn of intrest or a sequence dictionary.", category: "required"} + prefix: {description: "The prefix for the output files.", category: "advanced"} + chunkSize: {description: "Equivalent to chunked-scatter's `-c` option.", category: "advanced"} + overlap: {description: "Equivalent to chunked-scatter's `-o` option.", category: "advanced"} + minimumBasesPerFile: {description: "Equivalent to chunked-scatter's `-m` option.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } \ No newline at end of file From ebbf979ad043c53ae57a8c491cb9279777efe802 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 14 Jan 2020 14:05:29 +0100 Subject: [PATCH 0076/1208] add parameter_meta to SortVcf --- picard.wdl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index ea11a270..f1f07723 100644 --- a/picard.wdl +++ b/picard.wdl @@ -574,7 +574,7 @@ task SortVcf { String memory = "24G" String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" - } + } command { @@ -596,4 +596,16 @@ task SortVcf { docker: dockerImage memory: memory } + + parameter_meta { + vcfFiles: {description: "The VCF files to merge and sort.", category: "required"} + outputVcfPath: {description: "The location the sorted VCF files should be written to.", category: "required"} + dict: {description: "A sequence dictionary matching the VCF files.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } \ No newline at end of file From 3fc31f09851acb3272e1a120ef21bfea82984411 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 14 Jan 2020 16:38:44 +0100 Subject: [PATCH 0077/1208] Add new centrifuge tasks. --- centrifuge.wdl | 325 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 243 insertions(+), 82 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index ec6652f9..880d4c5b 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -1,124 +1,285 @@ version 1.0 -# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center # -# Tasks from centrifuge +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Build { input { + Boolean disableDifferenceCover = false File conversionTable File taxonomyTree - File inputFasta - String centrifugeIndexBase - String? preCommand - String centrifugeBuildExecutable = "centrifuge-build" - #Boolean? c = false - Boolean largeIndex = false - Boolean noAuto = false - Int? bMax - Int? bMaxDivn - Boolean noDiffCover = false - Boolean noRef = false - Boolean justRef = false - Int? offRate - Int? fTabChars - File? nameTable - File? sizeTable - Int? seed + File nameTable + File referenceFile + String indexBasename = "NAME" + String outputPrefix + + Int? offrate + Int? ftabChars Int? kmerCount + File? sizeTable - Int threads = 8 + Int cores = 5 String memory = "20G" + String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" } command { - set -e -o pipefail - ~{preCommand} - ~{"mkdir -p $(dirname " + centrifugeIndexBase + ")"} - ~{centrifugeBuildExecutable} \ - ~{true='--large-index' false='' largeIndex} \ - ~{true='--noauto' false='' noAuto} \ - ~{'--bmax ' + bMax} \ - ~{'--bmaxdivn ' + bMaxDivn} \ - ~{true='--nodc' false='' noDiffCover} \ - ~{true='--noref' false='' noRef} \ - ~{true='--justref' false='' justRef} \ - ~{'--offrate ' + offRate} \ - ~{'--ftabchars ' + fTabChars} \ - ~{'--name-table ' + nameTable } \ - ~{'--size-table ' + sizeTable} \ - ~{'--seed ' + seed} \ - ~{'--kmer-count' + kmerCount} \ - ~{'--threads ' + threads} \ - --conversion-table ~{conversionTable} \ - --taxonomy-tree ~{taxonomyTree} \ - ~{inputFasta} \ - ~{centrifugeIndexBase} + set -e + mkdir -p "$(dirname ~{outputPrefix})" + centrifuge-build \ + ~{"--threads " + cores} \ + ~{true="--nodc" false="" disableDifferenceCover} \ + ~{"--offrate " + offrate} \ + ~{"--ftabchars " + ftabChars} \ + ~{"--kmer-count " + kmerCount} \ + ~{"--size-table " + sizeTable} \ + ~{"--conversion-table " + conversionTable} \ + ~{"--taxonomy-tree " + taxonomyTree} \ + ~{"--name-table " + nameTable} \ + ~{referenceFile} \ + ~{outputPrefix + "/" + indexBasename} + } + + output { + Array[File] outputIndex = glob(outputPrefix + "/" + indexBasename + "*.cf") } runtime { - cpu: threads + cpu: cores memory: memory + docker: dockerImage + } + + parameter_meta { + disableDifferenceCover: { + description: "Disable use of the difference-cover sample.", + category: "required" + } + conversionTable: { + description: "List of UIDs (unique ID) and corresponding taxonomic IDs.", + category: "required" + } + taxonomyTree: { + description: "Taxonomic tree (e.g. nodes.dmp).", + category: "required" + } + nameTable: { + description: "Name table (e.g. names.dmp).", + category: "required" + } + referenceFile: { + description: "A comma-separated list of FASTA files containing the reference sequences to be aligned to.", + category: "required" + } + indexBasename: { + description: "The basename of the index files to write.", + category: "required" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + offrate: { + description: "The number of rows marked by the indexer.", + category: "common" + } + ftabChars: { + description: "Calculate an initial BW range with respect to this character.", + category: "common" + } + kmerCount: { + description: "Use as kmer-size for counting the distinct number of k-mers in the input sequences.", + category: "common" + } + sizeTable: { + description: "List of taxonomic IDs and lengths of the sequences belonging to the same taxonomic IDs.", + category: "common" + } } } task Classify { input { - String outputDir - Boolean compressOutput = true - String? preCommand + String inputFormat = "fastq" + Boolean phred64 = false + Int minHitLength = 22 String indexPrefix - Array[File]? unpairedReads Array[File]+ read1 - Array[File]? read2 - Boolean? fastaInput - # Variables for handling output + String outputPrefix + String outputName = basename(outputPrefix) - String? metFilePath # If this is specified, the report file is empty - Int? assignments - Int? minHitLen - Int? minTotalLen - Array[String]? hostTaxIds - Array[String]? excludeTaxIds + Array[File]? read2 + Int? trim5 + Int? trim3 + Int? reportMaxDistinct + String? hostTaxIDs + String? excludeTaxIDs - Int threads = 4 - String memory = "8G" + Int cores = 4 + String memory = "16G" + String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" } - String outputFilePath = outputDir + "/centrifuge.out" - String reportFilePath = outputDir + "/centrifuge_report.tsv" - String finalOutputPath = if (compressOutput == true) - then outputFilePath + ".gz" - else outputFilePath + Map[String, String] inputFormatOptions = {"fastq": "-q", "fasta": "-f", "qseq": "--qseq", "raw": "-r", "sequences": "-c"} command { - set -e -o pipefail - mkdir -p ~{outputDir} - ~{preCommand} + set -e + mkdir -p "$(dirname ~{outputPrefix})" centrifuge \ - ~{"-p " + threads} \ + ~{inputFormatOptions[inputFormat]} \ + ~{true="--phred64" false="--phred33" phred64} \ + ~{"--min-hitlen " + minHitLength} \ + ~{"--met-file " + outputPrefix + "/" + outputName + "_alignment_metrics.tsv"} \ + ~{"--threads " + cores} \ + ~{"--trim5 " + trim5} \ + ~{"--trim3 " + trim3} \ + ~{"-k " + reportMaxDistinct} \ + ~{"--host-taxids " + hostTaxIDs} \ + ~{"--exclude-taxids " + excludeTaxIDs} \ ~{"-x " + indexPrefix} \ - ~{true="-f" false="" fastaInput} \ - ~{true="-k" false="" defined(assignments)} ~{assignments} \ - ~{true="-1" false="-U" defined(read2)} ~{sep=',' read1} \ - ~{true="-2" false="" defined(read2)} ~{sep=',' read2} \ - ~{true="-U" false="" length(select_first([unpairedReads])) > 0} ~{sep=',' unpairedReads} \ - ~{"--report-file " + reportFilePath} \ - ~{"--min-hitlen " + minHitLen} \ - ~{"--min-totallen " + minTotalLen} \ - ~{"--met-file " + metFilePath} \ - ~{true="--host-taxids " false="" defined(hostTaxIds)} ~{sep=',' hostTaxIds} \ - ~{true="--exclude-taxids " false="" defined(excludeTaxIds)} ~{sep=',' excludeTaxIds} \ - ~{true="| gzip -c >" false="-S" compressOutput} ~{finalOutputPath} + ~{true="-1 " false="-U " defined(read2)} ~{sep="," read1} \ + ~{"-2 "} ~{sep="," read2} \ + ~{"-S " + outputPrefix + "/" + outputName + "_classification.tsv"} \ + ~{"--report-file " + outputPrefix + "/" + outputName + "_output_report.tsv"} + } + + output { + File outputMetrics = outputPrefix + "/" + outputName + "_alignment_metrics.tsv" + File outputClassification = outputPrefix + "/" + outputName + "_classification.tsv" + File outputReport = outputPrefix + "/" + outputName + "_output_report.tsv" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + inputFormat: { + description: "The format of the read file(s).", + category: "required" + } + phred64: { + description: "If set to true, Phred+64 encoding is used.", + category: "required" + } + minHitLength: { + description: "Minimum length of partial hits.", + category: "required" + } + indexPrefix: { + description: "The basename of the index for the reference genomes.", + category: "required" + } + read1: { + description: "List of files containing mate 1s, or unpaired reads.", + category: "required" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + outputName: { + description: "The base name of the outputPrefix.", + category: "required" + } + read2: { + description: "List of files containing mate 2s.", + category: "common" + } + trim5: { + description: "Trim bases from 5' (left) end of each read before alignment.", + category: "common" + } + trim3: { + description: "Trim bases from 3' (right) end of each read before alignment.", + category: "common" + } + reportMaxDistinct: { + description: "It searches for at most distinct, primary assignments for each read or pair.", + category: "common" + } + hostTaxIDs: { + description: "A comma-separated list of taxonomic IDs that will be preferred in classification procedure.", + category: "common" + } + excludeTaxIDs: { + description: "A comma-separated list of taxonomic IDs that will be excluded in classification procedure.", + category: "common" + } + } +} + +task Inspect { + input { + String printOption = fasta + String indexBasename + String outputPrefix + + Int? across + + Int cores = 1 + String memory = "4G" + String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" + } + + Map[String, String] outputOptions = {"fasta": "", "names": "--names", "summary": "--summary", "conversionTable": "--conversion-table", "taxonomyTree": "--taxonomy-tree", "nameTable": "--name-table", "sizeTable": "--size-table"} + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + centrifuge-inspect \ + ~{outputOptions[printOption]} \ + ~{"--across " + across} \ + ~{indexBasename} \ + > ~{outputPrefix + "/" + printOption} } output { - File classifiedReads = finalOutputPath - File reportFile = reportFilePath + File outputInspect = outputPrefix + "/" + printOption } runtime { - cpu: threads + cpu: cores memory: memory + docker: dockerImage + } + + parameter_meta { + printOption: { + description: "", + category: "required" + } + indexBasename: { + description: "", + category: "required" + } + outputPrefix: { + description: "Output directory path + output file prefix.", + category: "required" + } + across: { + description: "", + category: "common" + } } } From d17d5ca20d0ccb5da4e3ddc196890ffed3581f02 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 14 Jan 2020 16:49:11 +0100 Subject: [PATCH 0078/1208] Update parameter_meta and fix womtool validate error. --- centrifuge.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 880d4c5b..ec04a0b5 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -230,7 +230,7 @@ task Classify { task Inspect { input { - String printOption = fasta + String printOption = "fasta" String indexBasename String outputPrefix @@ -265,11 +265,11 @@ task Inspect { parameter_meta { printOption: { - description: "", + description: "The output option for inspect (fasta, summary, conversionTable, taxonomyTree, nameTable, sizeTable)", category: "required" } indexBasename: { - description: "", + description: "The basename of the index to be inspected.", category: "required" } outputPrefix: { @@ -277,7 +277,7 @@ task Inspect { category: "required" } across: { - description: "", + description: "When printing FASTA output, output a newline character every bases.", category: "common" } } From 4255eca7a9b041e138d92fefee05da9c16df9b70 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 14 Jan 2020 16:53:37 +0100 Subject: [PATCH 0079/1208] Remove trailing spaces after \. --- centrifuge.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index ec04a0b5..c88a838e 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -47,7 +47,7 @@ task Build { ~{"--threads " + cores} \ ~{true="--nodc" false="" disableDifferenceCover} \ ~{"--offrate " + offrate} \ - ~{"--ftabchars " + ftabChars} \ + ~{"--ftabchars " + ftabChars} \ ~{"--kmer-count " + kmerCount} \ ~{"--size-table " + sizeTable} \ ~{"--conversion-table " + conversionTable} \ From febf092d32c7f484db694019fddf3b7fff2ff02c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 15 Jan 2020 13:01:44 +0100 Subject: [PATCH 0080/1208] add parameter_meta to BWA --- bwa.wdl | 68 ++++++++++++++++++++++++--------------------------------- 1 file changed, 28 insertions(+), 40 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 05c8716a..b0b1daf7 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -42,6 +42,21 @@ task Mem { memory: memory docker: dockerImage } + + parameter_meta { + read1: {description: "The first or single end fastq file.", category: "required"} + read2: {description: "The second end fastq file.", category: "common"} + bwaIndex: {description: "The BWA index files.", category: "required"} + outputPath: {description: "The location the output BAM file should be written to.", category: "required"} + readgroup: {description: "The readgroup to be assigned to the reads. See BWA mem's `-R` option.", category: "common"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + picardXmx: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task Kit { @@ -91,47 +106,20 @@ task Kit { } parameter_meta { - read1: { - description: "The first-end fastq file.", - category: "required" - } - read2: { - description: "The second-end fastq file.", - category: "common" - } - bwaIndex: { - description: "The BWA index, including a .alt file.", - category: "required" - } - outputPrefix: { - description: "The prefix of the output files, including any parent directories.", - category: "required" - } - readgroup: { - description: "A readgroup identifier.", - category: "common" - } - sixtyFour: { - description: "Whether or not the index uses the '.64' suffixes.", - category: "common" - } - threads: { - description: "The number of threads to use for alignment.", - category: "advanced" - } - sortThreads: { - description: "The number of threads to use for sorting.", - category: "advanced" - } - memory: { - description: "The amount of memory this job will use.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + # inputs + read1: {description: "The first-end fastq file.", category: "required"} + read2: {description: "The second-end fastq file.", category: "common"} + bwaIndex: {description: "The BWA index, including a .alt file.", category: "required"} + outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + # outputs outputBam: "The produced BAM file." outputBamIndex: "The index of the produced BAM file." } From a78426b8549b1c525203bc46f1d51e66be667a7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 15 Jan 2020 15:22:06 +0100 Subject: [PATCH 0081/1208] update parameter_meta of minimap2, talon and transcriptclean --- CHANGELOG.md | 2 + minimap2.wdl | 128 +++++------------- talon.wdl | 314 ++++++++++++++------------------------------ transcriptclean.wdl | 157 +++++++--------------- 4 files changed, 179 insertions(+), 422 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d73879a7..5a0ed4da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Removed unused "cores" inputs from transcriptclean tasks. ++ Removed unused "cores" inputs from talon tasks. + Removed unused "threads" input from ModifyStrelka. + Removed the "installDir" inputs from the somaticseq tasks. + Removed the "installDir" input from CombineVariants. diff --git a/minimap2.wdl b/minimap2.wdl index d8a454da..aff51dcc 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -59,34 +59,20 @@ task Indexing { } parameter_meta { - useHomopolymerCompressedKmer: { - description: "Use homopolymer-compressed k-mer (preferrable for PacBio).", - category: "advanced" - } - kmerSize: { - description: "K-mer size (no larger than 28).", - category: "advanced" - } - minimizerWindowSize: { - description: "Minimizer window size.", - category: "advanced" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - referenceFile: { - description: "Reference fasta file.", - category: "required" - } - splitIndex: { - description: "Split index for every ~NUM input bases.", - category: "advanced" - } - outputIndexFile: { - description: "Indexed reference file.", - category: "required" - } + # input + useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for PacBio).", category: "advanced"} + kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} + minimizerWindowSize: {description: "Minimizer window size.", category: "advanced"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + referenceFile: {description: "Reference fasta file.", category: "required"} + splitIndex: {description: "Split index for every ~NUM input bases.", category: "advanced"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # output + outputIndexFile: {description: "Indexed reference file."} } } @@ -147,69 +133,27 @@ task Mapping { } parameter_meta { - presetOption: { - description: "This option applies multiple options at the same time.", - category: "common" - } - kmerSize: { - description: "K-mer size (no larger than 28).", - category: "advanced" - } - outputSAM: { - description: "Output in the SAM format.", - category: "common" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - maxIntronLength: { - description: "Max intron length (effective with -xsplice; changing -r).", - category: "advanced" - } - maxFragmentLength: { - description: "Max fragment length (effective with -xsr or in the fragment mode).", - category: "advanced" - } - skipSelfAndDualMappings: { - description: "Skip self and dual mappings (for the all-vs-all mode).", - category: "advanced" - } - retainMaxSecondaryAlignments: { - description: "Retain at most INT secondary alignments.", - category: "advanced" - } - matchingScore: { - description: "Matching score.", - category: "advanced" - } - mismatchPenalty: { - description: "Mismatch penalty.", - category: "advanced" - } - howToFindGTAG: { - description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", - category: "common" - } - addMDtagToSAM: { - description: "Adds a MD tag to the SAM output file.", - category: "common" - } - secondaryAlignment: { - description: "Whether to output secondary alignments.", - category: "advanced" - } - referenceFile: { - description: "Reference fasta file.", - category: "required" - } - queryFile: { - description: "Input fasta file.", - category: "required" - } - outputAlignmentFile: { - description: "Mapping and alignment between collections of DNA sequences file.", - category: "required" - } + presetOption: {description: "This option applies multiple options at the same time.", category: "common"} + kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} + outputSAM: {description: "Output in the SAM format.", category: "common"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + maxIntronLength: {description: "Max intron length (effective with -xsplice; changing -r).", category: "advanced"} + maxFragmentLength: {description: "Max fragment length (effective with -xsr or in the fragment mode).", category: "advanced"} + skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} + retainMaxSecondaryAlignments: {description: "Retain at most INT secondary alignments.", category: "advanced"} + matchingScore: {description: "Matching score.", category: "advanced"} + mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} + howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} + addMDtagToSAM: {description: "Adds a MD tag to the SAM output file.", category: "common"} + secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} + referenceFile: {description: "Reference fasta file.", category: "required"} + queryFile: {description: "Input fasta file.", category: "required"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # output + outputAlignmentFile: {description: "Mapping and alignment between collections of DNA sequences file."} } } diff --git a/talon.wdl b/talon.wdl index e2395cb7..0c8f482a 100644 --- a/talon.wdl +++ b/talon.wdl @@ -30,7 +30,6 @@ task CreateAbundanceFileFromDatabase { File? whitelistFile File? datasetsFile - Int cores = 1 String memory = "4G" String dockerImage = "biocontainers/talon:v4.4.1_cv1" } @@ -52,40 +51,25 @@ task CreateAbundanceFileFromDatabase { } runtime { - cpu: cores memory: memory docker: dockerImage } parameter_meta { - databaseFile: { - description: "TALON database.", - category: "required" - } - annotationVersion: { - description: "Which annotation version to use.", - category: "required" - } - genomeBuild: { - description: "Genome build to use.", - category: "required" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - whitelistFile: { - description: "Whitelist file of transcripts to include in the output.", - category: "advanced" - } - datasetsFile: { - description: "A file indicating which datasets should be included.", - category: "advanced" - } - outputAbundanceFile: { - description: "Abundance for each transcript in the TALON database across datasets.", - category: "required" - } + # inputs + databaseFile: {description: "TALON database.", category: "required"} + annotationVersion: {description: "Which annotation version to use.", category: "required"} + genomeBuild: {description: "Genome build to use.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + whitelistFile: {description: "Whitelist file of transcripts to include in the output.", category: "advanced"} + datasetsFile: {description: "A file indicating which datasets should be included.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputAbundanceFile: {description: "Abundance for each transcript in the TALON database across datasets."} + } } @@ -100,7 +84,6 @@ task CreateGtfFromDatabase { File? whitelistFile File? datasetFile - Int cores = 1 String memory = "4G" String dockerImage = "biocontainers/talon:v4.4.1_cv1" } @@ -123,44 +106,26 @@ task CreateGtfFromDatabase { } runtime { - cpu: cores memory: memory docker: dockerImage } parameter_meta { - databaseFile: { - description: "TALON database.", - category: "required" - } - genomeBuild: { - description: "Genome build to use.", - category: "required" - } - annotationVersion: { - description: "Which annotation version to use.", - category: "required" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - observedInDataset: { - description: "The output will only include transcripts that were observed at least once.", - category: "advanced" - } - whitelistFile: { - description: "Whitelist file of transcripts to include in the output.", - category: "advanced" - } - datasetFile: { - description: "A file indicating which datasets should be included.", - category: "advanced" - } - outputGTFfile: { - description: "The genes, transcripts, and exons stored a TALON database in GTF format.", - category: "required" - } + # inputs + databaseFile: {description: "TALON database.", category: "required"} + genomeBuild: {description: "Genome build to use.", category: "required"} + annotationVersion: {description: "Which annotation version to use.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + observedInDataset: {description: "The output will only include transcripts that were observed at least once.", category: "advanced"} + whitelistFile: {description: "Whitelist file of transcripts to include in the output.", category: "advanced"} + datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputGTFfile: {description: "The genes, transcripts, and exons stored a TALON database in GTF format."} } } @@ -172,7 +137,6 @@ task FilterTalonTranscripts { File? pairingsFile - Int cores = 1 String memory = "4G" String dockerImage = "biocontainers/talon:v4.4.1_cv1" } @@ -192,28 +156,18 @@ task FilterTalonTranscripts { } runtime { - cpu: cores memory: memory docker: dockerImage } parameter_meta { - databaseFile: { - description: "TALON database.", - category: "required" - } - annotationVersion: { - description: "Which annotation version to use.", - category: "required" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - pairingsFile: { - description: "A file indicating which datasets should be considered together.", - category: "advanced" - } + databaseFile: {description: "TALON database.", category: "required"} + annotationVersion: {description: "Which annotation version to use.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + pairingsFile: {description: "A file indicating which datasets should be considered together.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } @@ -225,7 +179,6 @@ task GetReadAnnotations { File? datasetFile - Int cores = 1 String memory = "4G" String dockerImage = "biocontainers/talon:v4.4.1_cv1" } @@ -245,32 +198,22 @@ task GetReadAnnotations { } runtime { - cpu: cores memory: memory docker: dockerImage } parameter_meta { - databaseFile: { - description: "TALON database.", - category: "required" - } - genomeBuild: { - description: "Genome build to use.", - category: "required" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - datasetFile: { - description: "A file indicating which datasets should be included.", - category: "advanced" - } - outputAnnotation: { - description: "Read-specific annotation information from a TALON database.", - category: "required" - } + # inputs + databaseFile: { description: "TALON database.", category: "required"} + genomeBuild: {description: "Genome build to use.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputAnnotation: {description: "Read-specific annotation information from a TALON database."} } } @@ -285,7 +228,6 @@ task InitializeTalonDatabase { Int cutoff3p = 300 String outputPrefix - Int cores = 1 String memory = "10G" String dockerImage = "biocontainers/talon:v4.4.1_cv1" } @@ -309,48 +251,26 @@ task InitializeTalonDatabase { } runtime { - cpu: cores memory: memory docker: dockerImage } parameter_meta { - GTFfile: { - description: "GTF annotation containing genes, transcripts, and edges.", - category: "required" - } - genomeBuild: { - description: "Name of genome build that the GTF file is based on (ie hg38).", - category: "required" - } - annotationVersion: { - description: "Name of supplied annotation (will be used to label data).", - category: "required" - } - minimumLength: { - description: "Minimum required transcript length.", - category: "common" - } - novelIDprefix: { - description: "Prefix for naming novel discoveries in eventual TALON runs.", - category: "common" - } - cutoff5p: { - description: "Maximum allowable distance (bp) at the 5' end during annotation.", - category: "advanced" - } - cutoff3p: { - description: "Maximum allowable distance (bp) at the 3' end during annotation.", - category: "advanced" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - outputDatabase: { - description: "TALON database.", - category: "required" - } + # inputs + GTFfile: {description: "GTF annotation containing genes, transcripts, and edges.", category: "required"} + genomeBuild: {description: "Name of genome build that the GTF file is based on (ie hg38).", category: "required"} + annotationVersion: {description: "Name of supplied annotation (will be used to label data).", category: "required"} + minimumLength: { description: "Minimum required transcript length.", category: "common"} + novelIDprefix: {description: "Prefix for naming novel discoveries in eventual TALON runs.", category: "common"} + cutoff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"} + cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputDatabase: {description: "TALON database."} } } @@ -358,7 +278,6 @@ task ReformatGtf { input { File GTFfile - Int cores = 1 String memory = "4G" String dockerImage = "biocontainers/talon:v4.4.1_cv1" } @@ -374,16 +293,15 @@ task ReformatGtf { } runtime { - cpu: cores memory: memory docker: dockerImage } parameter_meta { - GTFfile: { - description: "GTF annotation containing genes, transcripts, and edges.", - category: "required" - } + GTFfile: {description: "GTF annotation containing genes, transcripts, and edges.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } @@ -395,7 +313,6 @@ task SummarizeDatasets { File? datasetGroupsCSV - Int cores = 1 String memory = "4G" String dockerImage = "biocontainers/talon:v4.4.1_cv1" } @@ -415,32 +332,22 @@ task SummarizeDatasets { } runtime { - cpu: cores memory: memory docker: dockerImage } parameter_meta { - databaseFile: { - description: "TALON database.", - category: "required" - } - setVerbose: { - description: "Print out the counts in terminal.", - category: "advanced" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - datasetGroupsCSV: { - description: "File of comma-delimited dataset groups to process together.", - category: "advanced" - } - outputSummaryFile: { - description: "Tab-delimited file of gene and transcript counts for each dataset.", - category: "required" - } + # inputs + databaseFile: {description: "TALON database.", category: "required"} + setVerbose: {description: "Print out the counts in terminal.", category: "advanced"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputSummaryFile: {description: "Tab-delimited file of gene and transcript counts for each dataset."} } } @@ -493,53 +400,24 @@ task Talon { } parameter_meta { - SAMfiles: { - description: "Input SAM files.", - category: "required" - } - organism: { - description: "The name of the organism from which the samples originated.", - category: "required" - } - sequencingPlatform: { - description: "The sequencing platform used to generate long reads.", - category: "required" - } - databaseFile: { - description: "TALON database. Created using initialize_talon_database.py.", - category: "required" - } - genomeBuild: { - description: "Genome build (i.e. hg38) to use.", - category: "required" - } - minimumCoverage: { - description: "Minimum alignment coverage in order to use a SAM entry.", - category: "common" - } - minimumIdentity: { - description: "Minimum alignment identity in order to use a SAM entry.", - category: "common" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - outputUpdatedDatabase: { - description: "Updated TALON database.", - category: "required" - } - outputLog: { - description: "Log file from TALON run.", - category: "required" - } - outputAnnot: { - description: "Read annotation file from TALON run.", - category: "required" - } - outputConfigFile: { - description: "The TALON configuration file.", - category: "required" - } + # inputs + SAMfiles: {description: "Input SAM files.", category: "required"} + organism: {description: "The name of the organism from which the samples originated.", category: "required"} + sequencingPlatform: {description: "The sequencing platform used to generate long reads.", category: "required"} + databaseFile: {description: "TALON database. Created using initialize_talon_database.py.", category: "required"} + genomeBuild: {description: "Genome build (i.e. hg38) to use.", category: "required"} + minimumCoverage: {description: "Minimum alignment coverage in order to use a SAM entry.", category: "common"} + minimumIdentity: {description: "Minimum alignment identity in order to use a SAM entry.", category: "common" } + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputUpdatedDatabase: {description: "Updated TALON database."} + outputLog: {description: "Log file from TALON run."} + outputAnnot: {description: "Read annotation file from TALON run."} + outputConfigFile: {description: "The TALON configuration file."} } } diff --git a/transcriptclean.wdl b/transcriptclean.wdl index b7b913dc..e288e316 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -27,7 +27,6 @@ task GetSJsFromGtf { String outputPrefix Int minIntronSize = 21 - Int cores = 1 String memory = "8G" String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -47,32 +46,21 @@ task GetSJsFromGtf { } runtime { - cpu: cores memory: memory docker: dockerImage } parameter_meta { - GTFfile: { - description: "Input GTF file", - category: "required" - } - genomeFile: { - description: "Reference genome", - category: "required" - } - minIntronSize: { - description: "Minimum size of intron to consider a junction.", - category: "advanced" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - outputSJsFile: { - description: "Extracted splice junctions.", - category: "required" - } + # inputs + GTFfile: {description: "Input GTF file", category: "required"} + genomeFile: {description: "Reference genome", category: "required"} + minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + # outputs + outputSJsFile: {description: "Extracted splice junctions."} } } @@ -81,7 +69,6 @@ task GetTranscriptCleanStats { File transcriptCleanSAMfile String outputPrefix - Int cores = 1 String memory = "4G" String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -99,24 +86,20 @@ task GetTranscriptCleanStats { } runtime { - cpu: cores memory: memory docker: dockerImage } parameter_meta { - transcriptCleanSAMfile: { - description: "Output SAM file from TranscriptClean", - category: "required" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - outputStatsFile: { - description: "Summary stats from TranscriptClean run.", - category: "required" - } + # inputs + transcriptCleanSAMfile: {description: "Output SAM file from TranscriptClean", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputStatsFile: {description: "Summary stats from TranscriptClean run."} } } @@ -180,81 +163,31 @@ task TranscriptClean { } parameter_meta { - SAMfile: { - description: "Input SAM file containing transcripts to correct.", - category: "required" - } - referenceGenome: { - description: "Reference genome fasta file.", - category: "required" - } - maxLenIndel: { - description: "Maximum size indel to correct.", - category: "advanced" - } - maxSJoffset: { - description: "Maximum distance from annotated splice junction to correct.", - category: "advanced" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - correctMismatches: { - description: "Set this to make TranscriptClean correct mismatches.", - category: "common" - } - correctIndels: { - description: "Set this to make TranscriptClean correct indels.", - category: "common" - } - correctSJs: { - description: "Set this to make TranscriptClean correct splice junctions.", - category: "common" - } - dryRun: { - description: "TranscriptClean will read in the data but don't do any correction.", - category: "advanced" - } - primaryOnly: { - description: "Only output primary mappings of transcripts.", - category: "advanced" - } - canonOnly: { - description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.", - category: "advanced" - } - bufferSize: { - description: "Number of lines to output to file at once by each thread during run.", - category: "common" - } - deleteTmp: { - description: "The temporary directory generated by TranscriptClean will be removed.", - category: "common" - } - spliceJunctionAnnotation: { - description: "Splice junction file.", - category: "common" - } - variantFile: { - description: "VCF formatted file of variants.", - category: "common" - } - outputTranscriptCleanFasta: { - description: "Fasta file containing corrected reads.", - category: "required" - } - outputTranscriptCleanLog: { - description: "Log file of TranscriptClean run.", - category: "required" - } - outputTranscriptCleanSAM: { - description: "SAM file containing corrected aligned reads.", - category: "required" - } - outputTranscriptCleanTElog: { - description: "TE log file of TranscriptClean run.", - category: "required" - } + # inputs + SAMfile: {description: "Input SAM file containing transcripts to correct.", category: "required"} + referenceGenome: {description: "Reference genome fasta file.", category: "required"} + maxLenIndel: {description: "Maximum size indel to correct.", category: "advanced"} + maxSJoffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + correctMismatches: {description: "Set this to make TranscriptClean correct mismatches.", category: "common"} + correctIndels: {description: "Set this to make TranscriptClean correct indels.", category: "common"} + correctSJs: {description: "Set this to make TranscriptClean correct splice junctions.", category: "common"} + dryRun: {description: "TranscriptClean will read in the data but don't do any correction.", category: "advanced"} + primaryOnly: {description: "Only output primary mappings of transcripts.", category: "advanced"} + canonOnly: {description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.", category: "advanced"} + bufferSize: {description: "Number of lines to output to file at once by each thread during run.", category: "common"} + deleteTmp: {description: "The temporary directory generated by TranscriptClean will be removed.", category: "common"} + spliceJunctionAnnotation: {description: "Splice junction file.", category: "common"} + variantFile: {description: "VCF formatted file of variants.", category: "common"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputTranscriptCleanFasta: {description: "Fasta file containing corrected reads."} + outputTranscriptCleanLog: {description: "Log file of TranscriptClean run."} + outputTranscriptCleanSAM: {description: "SAM file containing corrected aligned reads."} + outputTranscriptCleanTElog: {description: "TE log file of TranscriptClean run."} } } From baeef514937156bda9941067ea9623e78d6673d1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 15 Jan 2020 16:23:32 +0100 Subject: [PATCH 0082/1208] add parameter_meta to CPAT --- CPAT.wdl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CPAT.wdl b/CPAT.wdl index f9a77bed..73c9d13c 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -36,6 +36,20 @@ task CPAT { runtime { docker: dockerImage } + + parameter_meta { + gene: {description: "Equivalent to CPAT's `--gene` option.", category: "required"} + outFilePath: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} + hex: {description: "Equivalent to CPAT's `--hex` option.", category: "required"} + logitModel: {description: "Equivalent to CPAT's `--logitModel` option.", category: "required"} + referenceGenome: {description: "Equivalent to CPAT's `--ref` option.", category: "advanced"} + referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", + category: "advanced"} + startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} + stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } # There is also make_hexamer_tab.py and make_logitModel.py From 21165c7bbe532f9456fbf29469509fa3d13c03f3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jan 2020 09:32:48 +0100 Subject: [PATCH 0083/1208] add parameter_meta to gffcompare and gff read --- gffcompare.wdl | 32 ++++++++++++++++++++++++++++++++ gffread.wdl | 13 +++++++++++++ 2 files changed, 45 insertions(+) diff --git a/gffcompare.wdl b/gffcompare.wdl index b60881fa..60d19d5f 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -30,6 +30,7 @@ task GffCompare { # Issue addressed at https://github.com/openwdl/wdl/pull/263 File? noneFile # This is a wdl workaround. Please do not assign! } + # This allows for the creation of output directories String dirPrefix = if defined(outputDir) then select_first([outputDir]) + "/" @@ -91,4 +92,35 @@ task GffCompare { runtime { docker: dockerImage } + + parameter_meta { + inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"} + inputGtfFiles: {description: "The input GTF files.", category: "required"} + referenceAnnotation: {description: "The GTF file to compare with.", category: "required"} + outputDir: {description: "The location the output should be written.", category: "common"} + outPrefix: {description: "The prefix for the output.", category: "advanced"} + genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"} + maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} + maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} + namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} + C: {description: "Equivalent to gffcompare's `-C` flag.", category: "advanced"} + A: {description: "Equivalent to gffcompare's `-A` flag.", category: "advanced"} + X: {description: "Equivalent to gffcompare's `-X` flag.", category: "advanced"} + K: {description: "Equivalent to gffcompare's `-K` flag.", category: "advanced"} + snCorrection: {description: "Equivalent to gffcompare's `-R` flag.", category: "advanced"} + precisionCorrection: {description: "Equivalent to gffcompare's `-Q` flag.", category: "advanced"} + discardSingleExonTransfragsAndReferenceTranscripts: {description: "Equivalent to gffcompare's `-M` flag.", category: "advanced"} + discardSingleExonReferenceTranscripts: {description: "Equivalent to gffcompare's `-N` flag.", category: "advanced"} + noTmap: {description: "Equivalent to gffcompare's `-T` flag.", category: "advanced"} + verbose: {description: "Equivalent to gffcompare's `-V` flag.", category: "advanced"} + debugMode: {description: "Equivalent to gffcompare's `-D` flag.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } + + meta { + WDL_AID: { + exclude: ["noneFile"] + } + } } \ No newline at end of file diff --git a/gffread.wdl b/gffread.wdl index da99781b..43682fbc 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -43,4 +43,17 @@ task GffRead { runtime { docker: dockerImage } + + parameter_meta { + inputGff: {description: "The input GFF file.", category: "required"} + genomicSequence: {description: "The genome.", category: "required"} + genomicIndex: {description: "The genome's index.", category: "advanced"} + exonsFastaPath: {description: "The location the exons fasta should be written to.", category: "advanced"} + CDSFastaPath: {description: "The location the CDS fasta should be written to.", category: "advanced"} + proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"} + filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} + outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } \ No newline at end of file From cf5c864dca9e2886609e545b545721e79bfe1e23 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jan 2020 09:54:07 +0100 Subject: [PATCH 0084/1208] add parameter_meta to hisat2 and star --- hisat2.wdl | 16 ++++++++++++++++ star.wdl | 20 +++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/hisat2.wdl b/hisat2.wdl index 3423e56b..1575f7e3 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -49,4 +49,20 @@ task Hisat2 { cpu: threads + 1 docker: dockerImage } + + parameter_meta { + indexFiles: {description: "The hisat2 index files.", category: "required"} + inputR1: {description: "The first-/single-end FastQ file.", category: "required"} + inputR2: {description: "The second-end FastQ file.", category: "common"} + outputBam: {description: "The location the output BAM file should be written to.", category: "required"} + sample: {description: "The sample id.", category: "required"} + library: {description: "The library id.", category: "required"} + readgroup: {description: "The readgroup id.", category: "required"} + platform: {description: "The platform used for sequencing.", category: "advanced"} + downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } \ No newline at end of file diff --git a/star.wdl b/star.wdl index fb788175..bc6ae5d9 100644 --- a/star.wdl +++ b/star.wdl @@ -19,7 +19,7 @@ task Star { String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } - #TODO Needs to be extended for all possible output extensions + #TODO Could be extended for all possible output extensions Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} command { @@ -48,6 +48,24 @@ task Star { memory: memory docker: dockerImage } + + parameter_meta { + inputR1: {description: "The first-/single-end FastQ files.", category: "required"} + inputR2: {description: "The second-end FastQ files (in the same order as the first-end files).", category: "common"} + indexFiles: {description: "The star index files.", category: "required"} + outFileNamePrefix: {description: "The prefix for the output files. May include directories.", category: "required"} + outSAMtype: {description: "The type of alignment file to be produced. Currently only `BAM SortedByCoordinate` is supported.", category: "advanced"} + readFilesCommand: {description: "Equivalent to star's `--readFilesCommand` option.", category: "advanced"} + outStd: {description: "Equivalent to star's `--outStd` option.", category: "advanced"} + twopassMode: {description: "Equivalent to star's `--twopassMode` option.", category: "advanced"} + outSAMattrRGline: {description: "The readgroup lines for the fastq pairs given (in the same order as the fastq files).", category: "common"} + outSAMunmapped: {description: "Equivalent to star's `--outSAMunmapped` option.", category: "advanced"} + limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"} + runThreadN: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task MakeStarRGline { From 8f41d5d524732c4ddf4817081143b237e63a04a5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jan 2020 12:54:08 +0100 Subject: [PATCH 0085/1208] fix linting error --- talon.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/talon.wdl b/talon.wdl index 0c8f482a..798d1945 100644 --- a/talon.wdl +++ b/talon.wdl @@ -119,7 +119,6 @@ task CreateGtfFromDatabase { observedInDataset: {description: "The output will only include transcripts that were observed at least once.", category: "advanced"} whitelistFile: {description: "Whitelist file of transcripts to include in the output.", category: "advanced"} datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} - cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 49a4e493142bad36fd4a7b8c6643d2b20947387f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jan 2020 14:06:33 +0100 Subject: [PATCH 0086/1208] update scripts --- .travis.yml | 2 +- scripts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index fec93c74..396b998f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,4 +19,4 @@ before_install: install: - conda install --file requirements-test.txt -script: bash scripts/biowdl_lint.sh skip-wdl-aid +script: bash scripts/biowdl_lint.sh diff --git a/scripts b/scripts index 76d1e695..c7e2da7e 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 76d1e695812aecd55fdf0221dc08b25d3ac7dde1 +Subproject commit c7e2da7e4e5556cc4d20db5f9495edbabdbc2fde From bd92d1f4336a5c2a3ee6a8079591e8d1912b3a8e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jan 2020 14:12:20 +0100 Subject: [PATCH 0087/1208] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index c7e2da7e..a1783b5c 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c7e2da7e4e5556cc4d20db5f9495edbabdbc2fde +Subproject commit a1783b5c789ebef601a8ec5849c4bbfe7dd3f87d From 0ffaa3a3c386e0c77719c60b61389168fdd7a003 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jan 2020 15:02:20 +0100 Subject: [PATCH 0088/1208] typos --- gatk.wdl | 6 +++--- picard.wdl | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index e0beeb54..5ca149c3 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -114,7 +114,7 @@ task BaseRecalibrator { inputBamIndex: {description: "The index of the input BAM file.", category: "required"} recalibrationReportPath: {description: "The location to write the BQSR report to.", category: "required"} sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"} - knownIndelsSitesVCFs: {description: "VCf files with known indels.", category: "advanced"} + knownIndelsSitesVCFs: {description: "VCF files with known indels.", category: "advanced"} knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} @@ -275,7 +275,7 @@ task GenotypeGVCFs { gvcfFiles: {description: "The GVCF files to be genotypes.", category: "required"} gvcfFilesIndex: {description: "The index of the input GVCF files.", category: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} - outputPath: {description: "The location to write the output VCf file to.", category: "required"} + outputPath: {description: "The location to write the output VCF file to.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", @@ -649,7 +649,7 @@ task FilterMutectCalls { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} unfilteredVcf: {description: "An unfiltered VCF file as produced by Mutect2.", category: "required"} unfilteredVcfIndex: {description: "The index of the unfiltered VCF file.", category: "required"} - outputVcf: {description: "The location the filtered VCf file should be written.", category: "required"} + outputVcf: {description: "The location the filtered VCF file should be written.", category: "required"} contaminationTable: {description: "Equivalent to FilterMutectCalls' `--contamination-table` option.", category: "advanced"} mafTumorSegments: {description: "Equivalent to FilterMutectCalls' `--tumor-segmentation` option.", category: "advanced"} artifactPriors: {description: "Equivalent to FilterMutectCalls' `--ob-priors` option.", category: "advanced"} diff --git a/picard.wdl b/picard.wdl index f1f07723..48ebf2d5 100644 --- a/picard.wdl +++ b/picard.wdl @@ -483,7 +483,7 @@ task MergeVCFs { parameter_meta { inputVCFs: {description: "The VCF files to be merged.", category: "required"} inputVCFsIndexes: {description: "The indexes of the VCF files.", category: "required"} - outputVcfPath: {description: "The location the output VCf file should be written to.", category: "required"} + outputVcfPath: {description: "The location the output VCF file should be written to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 9de764c466c3cb1d68fa170dfb5b51d4c337ccc2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 16 Jan 2020 16:26:03 +0100 Subject: [PATCH 0089/1208] Reformat parameter_meta. --- centrifuge.wdl | 140 ++++++++++--------------------------------------- scripts | 2 +- 2 files changed, 29 insertions(+), 113 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index c88a838e..8aaed45e 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -68,50 +68,17 @@ task Build { } parameter_meta { - disableDifferenceCover: { - description: "Disable use of the difference-cover sample.", - category: "required" - } - conversionTable: { - description: "List of UIDs (unique ID) and corresponding taxonomic IDs.", - category: "required" - } - taxonomyTree: { - description: "Taxonomic tree (e.g. nodes.dmp).", - category: "required" - } - nameTable: { - description: "Name table (e.g. names.dmp).", - category: "required" - } - referenceFile: { - description: "A comma-separated list of FASTA files containing the reference sequences to be aligned to.", - category: "required" - } - indexBasename: { - description: "The basename of the index files to write.", - category: "required" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - offrate: { - description: "The number of rows marked by the indexer.", - category: "common" - } - ftabChars: { - description: "Calculate an initial BW range with respect to this character.", - category: "common" - } - kmerCount: { - description: "Use as kmer-size for counting the distinct number of k-mers in the input sequences.", - category: "common" - } - sizeTable: { - description: "List of taxonomic IDs and lengths of the sequences belonging to the same taxonomic IDs.", - category: "common" - } + disableDifferenceCover: {description: "Disable use of the difference-cover sample.", category: "required"} + conversionTable: {description: "List of UIDs (unique ID) and corresponding taxonomic IDs.", category: "required"} + taxonomyTree: {description: "Taxonomic tree (e.g. nodes.dmp).", category: "required"} + nameTable: {description: "Name table (e.g. names.dmp).", category: "required"} + referenceFile: {description: "A comma-separated list of FASTA files containing the reference sequences to be aligned to.", category: "required"} + indexBasename: {description: "The basename of the index files to write.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + offrate: {description: "The number of rows marked by the indexer.", category: "common"} + ftabChars: {description: "Calculate an initial BW range with respect to this character.", category: "common"} + kmerCount: {description: "Use as kmer-size for counting the distinct number of k-mers in the input sequences.", category: "common"} + sizeTable: {description: "List of taxonomic IDs and lengths of the sequences belonging to the same taxonomic IDs.", category: "common"} } } @@ -173,58 +140,19 @@ task Classify { } parameter_meta { - inputFormat: { - description: "The format of the read file(s).", - category: "required" - } - phred64: { - description: "If set to true, Phred+64 encoding is used.", - category: "required" - } - minHitLength: { - description: "Minimum length of partial hits.", - category: "required" - } - indexPrefix: { - description: "The basename of the index for the reference genomes.", - category: "required" - } - read1: { - description: "List of files containing mate 1s, or unpaired reads.", - category: "required" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - outputName: { - description: "The base name of the outputPrefix.", - category: "required" - } - read2: { - description: "List of files containing mate 2s.", - category: "common" - } - trim5: { - description: "Trim bases from 5' (left) end of each read before alignment.", - category: "common" - } - trim3: { - description: "Trim bases from 3' (right) end of each read before alignment.", - category: "common" - } - reportMaxDistinct: { - description: "It searches for at most distinct, primary assignments for each read or pair.", - category: "common" - } - hostTaxIDs: { - description: "A comma-separated list of taxonomic IDs that will be preferred in classification procedure.", - category: "common" - } - excludeTaxIDs: { - description: "A comma-separated list of taxonomic IDs that will be excluded in classification procedure.", - category: "common" - } + inputFormat: {description: "The format of the read file(s).", category: "required"} + phred64: {description: "If set to true, Phred+64 encoding is used.", category: "required"} + minHitLength: {description: "Minimum length of partial hits.", category: "required"} + indexPrefix: {description: "The basename of the index for the reference genomes.", category: "required"} + read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + outputName: {description: "The base name of the outputPrefix.", category: "required"} + read2: {description: "List of files containing mate 2s.", category: "common"} + trim5: {description: "Trim bases from 5' (left) end of each read before alignment.", category: "common"} + trim3: {description: "Trim bases from 3' (right) end of each read before alignment.", category: "common"} + reportMaxDistinct: {description: "It searches for at most distinct, primary assignments for each read or pair.", category: "common"} + hostTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be preferred in classification procedure.", category: "common"} + excludeTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be excluded in classification procedure.", category: "common"} } } @@ -264,22 +192,10 @@ task Inspect { } parameter_meta { - printOption: { - description: "The output option for inspect (fasta, summary, conversionTable, taxonomyTree, nameTable, sizeTable)", - category: "required" - } - indexBasename: { - description: "The basename of the index to be inspected.", - category: "required" - } - outputPrefix: { - description: "Output directory path + output file prefix.", - category: "required" - } - across: { - description: "When printing FASTA output, output a newline character every bases.", - category: "common" - } + printOption: {description: "The output option for inspect (fasta, summary, conversionTable, taxonomyTree, nameTable, sizeTable)", category: "required"} + indexBasename: {description: "The basename of the index to be inspected.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + across: {description: "When printing FASTA output, output a newline character every bases.", category: "common"} } } diff --git a/scripts b/scripts index a1783b5c..fc603e5d 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit a1783b5c789ebef601a8ec5849c4bbfe7dd3f87d +Subproject commit fc603e5d408b89b99297fb5737586c059c5f9df6 From 25c831fefe2e3cbfa5aa531296aed3b372fa8de6 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 16 Jan 2020 16:27:21 +0100 Subject: [PATCH 0090/1208] Update scripts submodule. --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index fc603e5d..a1783b5c 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit fc603e5d408b89b99297fb5737586c059c5f9df6 +Subproject commit a1783b5c789ebef601a8ec5849c4bbfe7dd3f87d From fafd33724e8084943cc3fbc88ae75bee463d2ef6 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 17 Jan 2020 13:11:59 +0100 Subject: [PATCH 0091/1208] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1becd5ae..28f2e770 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Update centrifuge tasks. + Removed unused "cores" inputs from transcriptclean tasks. + Removed unused "cores" inputs from talon tasks. + Removed unused "threads" input from ModifyStrelka. From e707ed9544625933bb5e42e1b09f2bcfa2b430b2 Mon Sep 17 00:00:00 2001 From: Jasper Boom Date: Fri, 17 Jan 2020 13:29:55 +0100 Subject: [PATCH 0092/1208] Update centrifuge.wdl Co-Authored-By: DavyCats --- centrifuge.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 8aaed45e..865bf35f 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -27,7 +27,7 @@ task Build { File taxonomyTree File nameTable File referenceFile - String indexBasename = "NAME" + String indexBasename = "centrifuge_index" String outputPrefix Int? offrate From 948e3ca064bceb5beea027a5af1fc6c01723fac2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 17 Jan 2020 13:30:17 +0100 Subject: [PATCH 0093/1208] Update LINCENSE date. --- centrifuge.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 8aaed45e..6586c699 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2018 Sequencing Analysis Support Core - Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal From da98e6649613768aeef4aa7d53db37963a1d2a14 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 17 Jan 2020 13:45:52 +0100 Subject: [PATCH 0094/1208] Rename cores to threads. --- centrifuge.wdl | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index b22d3962..5c9ef087 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -35,7 +35,7 @@ task Build { Int? kmerCount File? sizeTable - Int cores = 5 + Int threads = 5 String memory = "20G" String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" } @@ -44,7 +44,7 @@ task Build { set -e mkdir -p "$(dirname ~{outputPrefix})" centrifuge-build \ - ~{"--threads " + cores} \ + ~{"--threads " + threads} \ ~{true="--nodc" false="" disableDifferenceCover} \ ~{"--offrate " + offrate} \ ~{"--ftabchars " + ftabChars} \ @@ -62,7 +62,7 @@ task Build { } runtime { - cpu: cores + cpu: threads memory: memory docker: dockerImage } @@ -99,7 +99,7 @@ task Classify { String? hostTaxIDs String? excludeTaxIDs - Int cores = 4 + Int threads = 4 String memory = "16G" String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" } @@ -114,7 +114,7 @@ task Classify { ~{true="--phred64" false="--phred33" phred64} \ ~{"--min-hitlen " + minHitLength} \ ~{"--met-file " + outputPrefix + "/" + outputName + "_alignment_metrics.tsv"} \ - ~{"--threads " + cores} \ + ~{"--threads " + threads} \ ~{"--trim5 " + trim5} \ ~{"--trim3 " + trim3} \ ~{"-k " + reportMaxDistinct} \ @@ -164,7 +164,6 @@ task Inspect { Int? across - Int cores = 1 String memory = "4G" String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" } @@ -186,7 +185,6 @@ task Inspect { } runtime { - cpu: cores memory: memory docker: dockerImage } From c9065ae10248aa1963e634e4d5d8b28925acf4cd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 17 Jan 2020 13:50:45 +0100 Subject: [PATCH 0095/1208] reorder gatk.wdl --- gatk.wdl | 547 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 274 insertions(+), 273 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 5ca149c3..a270794a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -132,6 +132,47 @@ task BaseRecalibrator { } } +task CalculateContamination { + input { + File tumorPileups + File? normalPileups + + String memory = "24G" + String javaXmx = "12G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + gatk --java-options -Xmx~{javaXmx} \ + CalculateContamination \ + -I ~{tumorPileups} \ + ~{"-matched " + normalPileups} \ + -O "contamination.table" \ + --tumor-segmentation "segments.table" + } + + output { + File contaminationTable = "contamination.table" + File mafTumorSegments = "segments.table" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorPileups: {description: "The pileup summary of a tumor/case sample.", category: "required"} + normalPileups: {description: "The pileup summary of the normal/control sample.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CombineGVCFs { input { Array[File]+ gvcfFiles @@ -187,6 +228,144 @@ task CombineGVCFs { } } +task CombineVariants { + input { + File referenceFasta + File referenceFastaFai + File referenceFastaDict + String genotypeMergeOption = "UNIQUIFY" + String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED" + Array[String]+ identifiers + Array[File]+ variantVcfs # follow "identifiers" array order + Array[File]+ variantIndexes + String outputPath + + String memory = "24G" + String javaXmx = "12G" + String dockerImage = "broadinstitute/gatk3:3.8-1" + } + + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + + # build "-V: " arguments according to IDs and VCFs to merge + # Make sure commands are run in bash + V_args=$(bash -c ' + set -eu + ids=(~{sep=" " identifiers}) + vars=(~{sep=" " variantVcfs}) + for (( i = 0; i < ${#ids[@]}; ++i )) + do + printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" + done + ') + java -Xmx~{javaXmx} -jar /usr/GenomeAnalysisTK.jar \ + -T CombineVariants \ + -R ~{referenceFasta} \ + --genotypemergeoption ~{genotypeMergeOption} \ + --filteredrecordsmergetype ~{filteredRecordsMergeType} \ + --out ~{outputPath} \ + $V_args + >>> + + output { + File combinedVcf = outputPath + File combinedVcfIndex = outputPath + ".tbi" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + genotypeMergeOption: {description: "Equivalent to CombineVariants' `--genotypemergeoption` option.", category: "advanced"} + filteredRecordsMergeType: {description: "Equivalent to CombineVariants' `--filteredrecordsmergetype` option.", category: "advanced"} + identifiers: {description: "The sample identifiers in the same order as variantVcfs.", category: "required"} + variantVcfs: {description: "The input VCF files in the same order as identifiers.", category: "required"} + variantIndexes: {description: "The indexes of the input VCF files.", category: "required"} + outputPath: {description: "The location the output should be written to", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task FilterMutectCalls { + input { + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File unfilteredVcf + File unfilteredVcfIndex + String outputVcf + File? contaminationTable + File? mafTumorSegments + File? artifactPriors + Int uniqueAltReadCount = 4 + File mutect2Stats + + String memory = "24G" + String javaXmx = "12G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputVcf})" + gatk --java-options -Xmx~{javaXmx} \ + FilterMutectCalls \ + -R ~{referenceFasta} \ + -V ~{unfilteredVcf} \ + -O ~{outputVcf} \ + ~{"--contamination-table " + contaminationTable} \ + ~{"--tumor-segmentation " + mafTumorSegments} \ + ~{"--ob-priors " + artifactPriors} \ + ~{"--unique-alt-read-count " + uniqueAltReadCount} \ + ~{"-stats " + mutect2Stats} \ + --filtering-stats "filtering.stats" \ + --showHidden + } + + output { + File filteredVcf = outputVcf + File filteredVcfIndex = outputVcf + ".tbi" + File filteringStats = "filtering.stats" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + unfilteredVcf: {description: "An unfiltered VCF file as produced by Mutect2.", category: "required"} + unfilteredVcfIndex: {description: "The index of the unfiltered VCF file.", category: "required"} + outputVcf: {description: "The location the filtered VCF file should be written.", category: "required"} + contaminationTable: {description: "Equivalent to FilterMutectCalls' `--contamination-table` option.", category: "advanced"} + mafTumorSegments: {description: "Equivalent to FilterMutectCalls' `--tumor-segmentation` option.", category: "advanced"} + artifactPriors: {description: "Equivalent to FilterMutectCalls' `--ob-priors` option.", category: "advanced"} + uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"} + mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + # Combine multiple recalibration tables from scattered BaseRecalibrator runs task GatherBqsrReports { input { @@ -292,6 +471,57 @@ task GenotypeGVCFs { } } +task GetPileupSummaries { + input { + File sampleBam + File sampleBamIndex + File variantsForContamination + File variantsForContaminationIndex + File sitesForContamination + File sitesForContaminationIndex + String outputPrefix + + String memory = "24G" + String javaXmx = "12G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + gatk --java-options -Xmx~{javaXmx} \ + GetPileupSummaries \ + -I ~{sampleBam} \ + -V ~{variantsForContamination} \ + -L ~{sitesForContamination} \ + -O ~{outputPrefix + "-pileups.table"} + } + + output { + File pileups = outputPrefix + "-pileups.table" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleBam: {description: "A BAM file for which a pileup should be created.", category: "required"} + sampleBamIndex: {description: "The index of the input BAM file.", category: "required"} + variantsForContamination: {description: "A VCF file with common variants.", category: "required"} + variantsForContaminationIndex: {description: "The index for the common variants VCF file.", category: "required"} + sitesForContamination: {description: "A bed file describing regions to operate on.", category: "required"} + sitesForContaminationIndex: {description: "The index for the bed file.", category: "required"} + outputPrefix: {description: "The prefix for the ouput.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + # Call variants on a single sample with HaplotypeCaller to produce a GVCF task HaplotypeCallerGvcf { input { @@ -357,87 +587,13 @@ task HaplotypeCallerGvcf { } } -task MuTect2 { + +task LearnReadOrientationModel { input { - Array[File]+ inputBams - Array[File]+ inputBamsIndex - File referenceFasta - File referenceFastaDict - File referenceFastaFai - String outputVcf - String tumorSample - String? normalSample - File? germlineResource - File? germlineResourceIndex - File? panelOfNormals - File? panelOfNormalsIndex - String f1r2TarGz = "f1r2.tar.gz" - Array[File]+ intervals - String outputStats = outputVcf + ".stats" + Array[File]+ f1r2TarGz - String memory = "16G" - String javaXmx = "4G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" - } - - command { - set -e - mkdir -p "$(dirname ~{outputVcf})" - gatk --java-options -Xmx~{javaXmx} \ - Mutect2 \ - -R ~{referenceFasta} \ - -I ~{sep=" -I " inputBams} \ - -tumor ~{tumorSample} \ - ~{"-normal " + normalSample} \ - ~{"--germline-resource " + germlineResource} \ - ~{"--panel-of-normals " + panelOfNormals} \ - ~{"--f1r2-tar-gz " + f1r2TarGz} \ - -O ~{outputVcf} \ - -L ~{sep=" -L " intervals} - } - - output { - File vcfFile = outputVcf - File vcfFileIndex = outputVcf + ".tbi" - File f1r2File = f1r2TarGz - File stats = outputStats - } - - runtime { - docker: dockerImage - memory: memory - } - - parameter_meta { - inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} - inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - outputVcf: {description: "The location to write the output VCF file to.", category: "required"} - tumorSample: {description: "The name of the tumor/case sample.", category: "required"} - normalSample: {description: "The name of the normal/control sample.", category: "common"} - germlineResource: {description: "Equivalent to Mutect2's `--germline-resource` option.", category: "advanced"} - germlineResourceIndex: {description: "The index for the germline resource.", category: "advanced"} - panelOfNormals: {description: "Equivalent to Mutect2's `--panel-of-normals` option.", category: "advanced"} - panelOfNormalsIndex: {description: "The index for the panel of normals.", category: "advanced"} - f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"} - intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"} - outputStats: {description: "The location the output statistics should be written to.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task LearnReadOrientationModel { - input { - Array[File]+ f1r2TarGz - - String memory = "24G" - String javaXmx = "12G" + String memory = "24G" + String javaXmx = "12G" String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -504,114 +660,26 @@ task MergeStats { } } -task GetPileupSummaries { - input { - File sampleBam - File sampleBamIndex - File variantsForContamination - File variantsForContaminationIndex - File sitesForContamination - File sitesForContaminationIndex - String outputPrefix - - String memory = "24G" - String javaXmx = "12G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" - } - - command { - set -e - gatk --java-options -Xmx~{javaXmx} \ - GetPileupSummaries \ - -I ~{sampleBam} \ - -V ~{variantsForContamination} \ - -L ~{sitesForContamination} \ - -O ~{outputPrefix + "-pileups.table"} - } - - output { - File pileups = outputPrefix + "-pileups.table" - } - - runtime { - docker: dockerImage - memory: memory - } - - parameter_meta { - sampleBam: {description: "A BAM file for which a pileup should be created.", category: "required"} - sampleBamIndex: {description: "The index of the input BAM file.", category: "required"} - variantsForContamination: {description: "A VCF file with common variants.", category: "required"} - variantsForContaminationIndex: {description: "The index for the common variants VCF file.", category: "required"} - sitesForContamination: {description: "A bed file describing regions to operate on.", category: "required"} - sitesForContaminationIndex: {description: "The index for the bed file.", category: "required"} - outputPrefix: {description: "The prefix for the ouput.", category: "required"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task CalculateContamination { - input { - File tumorPileups - File? normalPileups - - String memory = "24G" - String javaXmx = "12G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" - } - - command { - set -e - gatk --java-options -Xmx~{javaXmx} \ - CalculateContamination \ - -I ~{tumorPileups} \ - ~{"-matched " + normalPileups} \ - -O "contamination.table" \ - --tumor-segmentation "segments.table" - } - - output { - File contaminationTable = "contamination.table" - File mafTumorSegments = "segments.table" - } - - runtime { - docker: dockerImage - memory: memory - } - - parameter_meta { - tumorPileups: {description: "The pileup summary of a tumor/case sample.", category: "required"} - normalPileups: {description: "The pileup summary of the normal/control sample.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task FilterMutectCalls { +task MuTect2 { input { + Array[File]+ inputBams + Array[File]+ inputBamsIndex File referenceFasta - File referenceFastaFai File referenceFastaDict - File unfilteredVcf - File unfilteredVcfIndex + File referenceFastaFai String outputVcf - File? contaminationTable - File? mafTumorSegments - File? artifactPriors - Int uniqueAltReadCount = 4 - File mutect2Stats + String tumorSample + String? normalSample + File? germlineResource + File? germlineResourceIndex + File? panelOfNormals + File? panelOfNormalsIndex + String f1r2TarGz = "f1r2.tar.gz" + Array[File]+ intervals + String outputStats = outputVcf + ".stats" - String memory = "24G" - String javaXmx = "12G" + String memory = "16G" + String javaXmx = "4G" String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -619,23 +687,23 @@ task FilterMutectCalls { set -e mkdir -p "$(dirname ~{outputVcf})" gatk --java-options -Xmx~{javaXmx} \ - FilterMutectCalls \ + Mutect2 \ -R ~{referenceFasta} \ - -V ~{unfilteredVcf} \ + -I ~{sep=" -I " inputBams} \ + -tumor ~{tumorSample} \ + ~{"-normal " + normalSample} \ + ~{"--germline-resource " + germlineResource} \ + ~{"--panel-of-normals " + panelOfNormals} \ + ~{"--f1r2-tar-gz " + f1r2TarGz} \ -O ~{outputVcf} \ - ~{"--contamination-table " + contaminationTable} \ - ~{"--tumor-segmentation " + mafTumorSegments} \ - ~{"--ob-priors " + artifactPriors} \ - ~{"--unique-alt-read-count " + uniqueAltReadCount} \ - ~{"-stats " + mutect2Stats} \ - --filtering-stats "filtering.stats" \ - --showHidden + -L ~{sep=" -L " intervals} } output { - File filteredVcf = outputVcf - File filteredVcfIndex = outputVcf + ".tbi" - File filteringStats = "filtering.stats" + File vcfFile = outputVcf + File vcfFileIndex = outputVcf + ".tbi" + File f1r2File = f1r2TarGz + File stats = outputStats } runtime { @@ -644,18 +712,21 @@ task FilterMutectCalls { } parameter_meta { + inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} + inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - unfilteredVcf: {description: "An unfiltered VCF file as produced by Mutect2.", category: "required"} - unfilteredVcfIndex: {description: "The index of the unfiltered VCF file.", category: "required"} - outputVcf: {description: "The location the filtered VCF file should be written.", category: "required"} - contaminationTable: {description: "Equivalent to FilterMutectCalls' `--contamination-table` option.", category: "advanced"} - mafTumorSegments: {description: "Equivalent to FilterMutectCalls' `--tumor-segmentation` option.", category: "advanced"} - artifactPriors: {description: "Equivalent to FilterMutectCalls' `--ob-priors` option.", category: "advanced"} - uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"} - mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"} - + outputVcf: {description: "The location to write the output VCF file to.", category: "required"} + tumorSample: {description: "The name of the tumor/case sample.", category: "required"} + normalSample: {description: "The name of the normal/control sample.", category: "common"} + germlineResource: {description: "Equivalent to Mutect2's `--germline-resource` option.", category: "advanced"} + germlineResourceIndex: {description: "The index for the germline resource.", category: "advanced"} + panelOfNormals: {description: "Equivalent to Mutect2's `--panel-of-normals` option.", category: "advanced"} + panelOfNormalsIndex: {description: "The index for the panel of normals.", category: "advanced"} + f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"} + intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"} + outputStats: {description: "The location the output statistics should be written to.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -718,73 +789,3 @@ task SplitNCigarReads { category: "advanced"} } } - -task CombineVariants { - input { - File referenceFasta - File referenceFastaFai - File referenceFastaDict - String genotypeMergeOption = "UNIQUIFY" - String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED" - Array[String]+ identifiers - Array[File]+ variantVcfs # follow "identifiers" array order - Array[File]+ variantIndexes - String outputPath - - String memory = "24G" - String javaXmx = "12G" - String dockerImage = "broadinstitute/gatk3:3.8-1" - } - - command <<< - set -e - mkdir -p "$(dirname ~{outputPath})" - - # build "-V: " arguments according to IDs and VCFs to merge - # Make sure commands are run in bash - V_args=$(bash -c ' - set -eu - ids=(~{sep=" " identifiers}) - vars=(~{sep=" " variantVcfs}) - for (( i = 0; i < ${#ids[@]}; ++i )) - do - printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" - done - ') - java -Xmx~{javaXmx} -jar /usr/GenomeAnalysisTK.jar \ - -T CombineVariants \ - -R ~{referenceFasta} \ - --genotypemergeoption ~{genotypeMergeOption} \ - --filteredrecordsmergetype ~{filteredRecordsMergeType} \ - --out ~{outputPath} \ - $V_args - >>> - - output { - File combinedVcf = outputPath - File combinedVcfIndex = outputPath + ".tbi" - } - - runtime { - docker: dockerImage - memory: memory - } - - parameter_meta { - referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - genotypeMergeOption: {description: "Equivalent to CombineVariants' `--genotypemergeoption` option.", category: "advanced"} - filteredRecordsMergeType: {description: "Equivalent to CombineVariants' `--filteredrecordsmergetype` option.", category: "advanced"} - identifiers: {description: "The sample identifiers in the same order as variantVcfs.", category: "required"} - variantVcfs: {description: "The input VCF files in the same order as identifiers.", category: "required"} - variantIndexes: {description: "The indexes of the input VCF files.", category: "required"} - outputPath: {description: "The location the output should be written to", category: "required"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} From 3d75296517fc6f398dcfa5f241a6606d3de36d84 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 17 Jan 2020 13:51:59 +0100 Subject: [PATCH 0096/1208] Fix CI error. --- centrifuge.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 5c9ef087..5110b872 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -134,7 +134,7 @@ task Classify { } runtime { - cpu: cores + cpu: threads memory: memory docker: dockerImage } From ed6e29113198e9117672615e80524d90ab95cdb2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 20 Jan 2020 16:44:49 +0100 Subject: [PATCH 0097/1208] add AnnotateIntervals, CollectReadCounts, CreateReadCountPanelOfNormals and PreprocessIntervals --- gatk.wdl | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index a270794a..f6d42e82 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1,5 +1,83 @@ version 1.0 +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task AnnotateIntervals { + input { + File referenceFasta + File referenceFastaDict + File referenceFastaFai + String annotatedIntervalsPath = "intervals.annotated.tsv" + File intervals + String intervalMergingRule = "OVERLAPPING_ONLY" + File? mappabilityTrack + File? segmentalDuplicationTrack + Int featureQueryLookahead = 1000000 + + String memory = "120" + String javaXmx = "2G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{annotatedIntervalsPath}" + gatk --java-options -Xmx~{javaXmx} \ + AnnotateIntervals \ + -R ~{referenceFasta} \ + -L ~{intervals} \ + ~{"--mappability-track " + mappabilityTrack} \ + ~{"--segmental-duplication-track " + segmentalDuplicationTrack} \ + --feature-query-lookahead ~{featureQueryLookahead} \ + --interval-merging-rule ~{intervalMergingRule} \ + -O ~{annotatedIntervalsPath} + } + + output { + File annotatedIntervals = annotatedIntervalsPath + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + annotatedIntervalsPath: {description: "The location the output should be written to.", category: "advanced"} + intervals: {description: "An interval list describinig the intervals to annotate.", category: "required"} + intervalMergingRule: {description: "Equivalent to gatk AnnotateIntervals' `--interval-merging-rule` option.", category: "advanced"} + mappabilityTrack: {description: "Equivalent to gatk AnnotateIntervals' `--mappability-track` option.", category: "common"} + segmentalDuplicationTrack: {description: "Equivalent to gatk AnnotateIntervals' `--segmenta-duplicarion-track` option.", category: "common"} + featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + # Apply Base Quality Score Recalibration (BQSR) model task ApplyBQSR { input { @@ -173,6 +251,61 @@ task CalculateContamination { } } +task CollectReadCounts { + input { + String countsPath = "readcounts.hdf5" + File intervals + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + String intervalMergingRule = "OVERLAPPING_ONLY" + + String memory = "35G" + String javaXmx = "7G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p "$(dirname ~{countsPath})" + gatk --java-options -Xmx~{javaXmx} \ + CollectReadCounts \ + -L ~{intervals} \ + -I ~{inputBam} \ + -R ~{referenceFasta} \ + --format HDF5 \ + --interval-merging-rule ~{intervalMergingRule} \ + -O ~{countsPath} + } + + output { + File counts = countsPath + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + countsPath: {description: "The location the output should be written to.", category: "required"} + intervals: {description: "The intervals to collect counts for.", category: "required"} + inputBam: {description: "The BAM file to determine the coverage for.", category: "required"} + inputBamIndex: {description: "The input BAM file's index.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + intervalMergingRule: {description: "Equivalent to gatk CollectReadCounts' `--interval-merging-rule` option.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CombineGVCFs { input { Array[File]+ gvcfFiles @@ -298,6 +431,49 @@ task CombineVariants { } } +task CreateReadCountPanelOfNormals { + input { + String PONpath = "PON.hdf5" + Array[File]+ readCountsFiles + File? annotatedIntervals + + String memory = "21G" + String javaXmx = "7G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p ~{PONpath} + gatk --java-options -Xmx~{javaXmx} \ + CreateReadCountPanelOfNormals \ + -I ~{sep=" -I " readCountsFiles} \ + ~{"--annotated-intervals " + annotatedIntervals} \ + -O ~{PONpath} + } + + output { + File PON = PONpath + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + PONpath: {description: "The location the PON should be written to.", category: "common"} + readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "advanced"} + annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", + category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task FilterMutectCalls { input { File referenceFasta @@ -735,6 +911,62 @@ task MuTect2 { } } +task PreprocessIntervals { + input { + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File? intervals + String outputIntervalList = "bins.interval_list" + Int binLength = if defined(intervals) then 0 else 1000 + Int padding = if defined(intervals) then 250 else 0 + String intervalMergingRule = "OVERLAPPING_ONLY" + + String memory = "10G" + String javaXmx = "2G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputIntervalList})" + gatk --java-options -Xmx~{javaXmx} \ + PreprocessIntervals \ + -R ~{referenceFasta} \ + --sequence-dictinary ~{referenceFastaDict} \ + --bin-length ~{binLength} \ + --padding ~{padding} + ~{"-L " + intervals} \ + --interval-merging-rule ~{intervalMergingRule} \ + -O ~{outputIntervalList} + } + + output { + File intervalList = outputIntervalList + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + referenceFasta: {description: "The reference fasta file..", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"} + outputIntervalList: {description: "The location the output should be written to.", category: "advanced"} + binLength: {description: "The size of the bins to be created. Should be 0 for targeted/exome sequencing.", category: "advanced"} + padding: {description: "The padding to be added to the bins. Should be 0 if contiguos binning is used, eg with WGS.", category: "advanced"} + intervalMergingRule: {description: "Equivalent to gatk PreprocessIntervals' `--interval-merging-rule` option.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task SplitNCigarReads { input { File inputBam From 9fefd1f2ea43cf4a5e4be1b26858804e053364d1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 21 Jan 2020 10:50:31 +0100 Subject: [PATCH 0098/1208] add changes to changelog --- CHANGELOG.md | 5 +++++ scripts | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28f2e770..da6b7887 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple + bed files called bedtools.MergeBedFiles. This task combines bedtools merge + and sort. ++ Change `g` parameter on bedtools.Sort to `genome`. ++ Add `ploidity` and `excludeIntervalList` to gatk.HaplotypeCallerGvcf. + Update centrifuge tasks. + Removed unused "cores" inputs from transcriptclean tasks. + Removed unused "cores" inputs from talon tasks. diff --git a/scripts b/scripts index a1783b5c..fc603e5d 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit a1783b5c789ebef601a8ec5849c4bbfe7dd3f87d +Subproject commit fc603e5d408b89b99297fb5737586c059c5f9df6 From 0a2b69924c1a524c8255d3d8a7c6731b8371ded1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 21 Jan 2020 11:21:04 +0100 Subject: [PATCH 0099/1208] update submodules --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index fc603e5d..a1783b5c 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit fc603e5d408b89b99297fb5737586c059c5f9df6 +Subproject commit a1783b5c789ebef601a8ec5849c4bbfe7dd3f87d From 7d4af040d12212f75230f9ef4069f1b9cd278da9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 21 Jan 2020 13:42:37 +0100 Subject: [PATCH 0100/1208] fix indentation --- bedtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index d775a4b3..f02e8b82 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -82,9 +82,9 @@ task Merge { parameter_meta { inputBed: {description: "The bed to merge", - category: "required"} + category: "required"} outputBed: {description: "The path to write the output to", - category: "advanced"} + category: "advanced"} dockerImage: { description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced" From e95ba4ab87501bd145836b3012b5a9b96cadf628 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 21 Jan 2020 13:44:19 +0100 Subject: [PATCH 0101/1208] Make bedFiles obligatory Co-Authored-By: DavyCats --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index d775a4b3..1b862132 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -95,7 +95,7 @@ task Merge { # Use cat, bedtools sort and bedtools merge to merge bedfiles in a single task. task MergeBedFiles { input { - Array[File] bedFiles + Array[File]+ bedFiles String outputBed = "merged.bed" String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } From 23dd04d183ce1b8ae855cc8246aa2b4634543871 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 22 Jan 2020 17:05:10 +0100 Subject: [PATCH 0102/1208] add various CNV calling tasks --- gatk.wdl | 313 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 311 insertions(+), 2 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index f6d42e82..f54934a0 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -251,6 +251,96 @@ task CalculateContamination { } } +task CallCopyRatioSegments { + input { + String outputPrefix + File copyRatioSegments + + String memory = "21G" + String javaXmx = "6G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p "$(~{outputPrefix})" + gatk --java-options -Xmx~{javaXmx} \ + CallCopyRatioSegments \ + -I ~{copyRatioSegments} \ + -O ~{outputPrefix}.called.seg + } + + output { + File calledSegments = outputPrefix + ".called.seg" + File calledSegmentsIgv = outputPrefix + ".called.igv.seg" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + outputPrefix: {description: "The prefix for the output files.", category: "required"} + copyRatioSegments: {description: "The copy ratios file generated by gatk ModelSegments.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task CollectAllelicCounts { + input { + String allelicCountsPath = "allelic_counts.tsv" + File commonVariantSites + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + String memory = "90G" + String javaXmx = "30G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p "$(dirname ~{allelicCountsPath})" + gatk --java-options -Xmx~{javaXmx} \ + CollectAllelicCounts \ + -L ~{commonVariantSites} \ + -I ~{inputBam} \ + -R ~{referenceFasta} \ + -O ~{allelicCountsPath} + } + + output { + File allelicCounts = allelicCountsPath + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + allelicCountsPath: {description: "The path the output should be written to.", category: "advanced"} + commonVariantSites: {description: "Interval list of common vairat sies (to retrieve the allelic counts for).", category: "required"} + inputBam: {description: "The BAM file to generate counts for.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CollectReadCounts { input { String countsPath = "readcounts.hdf5" @@ -290,7 +380,7 @@ task CollectReadCounts { } parameter_meta { - countsPath: {description: "The location the output should be written to.", category: "required"} + countsPath: {description: "The location the output should be written to.", category: "advanced"} intervals: {description: "The intervals to collect counts for.", category: "required"} inputBam: {description: "The BAM file to determine the coverage for.", category: "required"} inputBamIndex: {description: "The input BAM file's index.", category: "required"} @@ -463,7 +553,7 @@ task CreateReadCountPanelOfNormals { parameter_meta { PONpath: {description: "The location the PON should be written to.", category: "common"} - readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "advanced"} + readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "required"} annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -474,6 +564,54 @@ task CreateReadCountPanelOfNormals { } } +task DenoiseReadCounts { + input { + File? PON + File? annotatedIntervals + File readCounts + String outputPrefix + + String memory = "39G" + String javaXmx = "13G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + gatk --java-options -Xmx~{javaXmx} \ + DenoiseReadCounts \ + -I ~{readCounts} \ + ~{"--count-panel-of-normals " + PON} \ + ~{"--annotated-intervals " + annotatedIntervals} \ + --standardized-copy-ratios ~{outputPrefix}.standardizedCR.tsv \ + --denoised-copy-ratios ~{outputPrefix}.denoisedCR.tsv + } + + output { + File standardizedCopyRatios = outputPrefix + ".standardizedCR.tsv" + File denoisedCopyRatios = outputPrefix + ".denoisedCR.tsv" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"} + annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.", + category: "advanced"} + readCounts: {description: "The read counts file as generated by CollectReadCounts.", category: "required"} + outputPrefix: {description: "The prefix for the output files.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task FilterMutectCalls { input { File referenceFasta @@ -836,6 +974,73 @@ task MergeStats { } } +task ModelSegments { + input { + String outputDir = "." + String outputPrefix + File denoisedCopyRatios + File allelicCounts + File? normalAllelicCounts + Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts) + then 0 + else 30 + Int maximumNumberOfSmoothingIterations = 10 + + String memory = "64G" + String javaXmx = "10G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + } + + command { + set -e + mkdir -p ~{outputDir} + gatk --java-options -Xmx~{javaXmx} \ + ModelSegments \ + --denoised-copy-ratios ~{denoisedCopyRatios} \ + --allelic-counts ~{allelicCounts} \ + ~{"--normal-allelic-counts " + normalAllelicCounts} \ + --minimum-total-allele-count-case ~{minimumTotalAlleleCountCase} + --maximum-number-of-smoothing-iterations ~{maximumNumberOfSmoothingIterations} + --output ~{outputDir} \ + --output-prefix ~{outputPrefix} + } + + output { + File hetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.tsv" + File normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" + File copyRatioSegments = outputDir + "/" + outputPrefix + ".cr.seg" + File copyRatioCBS = outputDir + "/" + outputPrefix + ".cr.igv.seg" + File alleleFractionCBS = outputDir + "/" + outputPrefix + ".af.igv.seg" + File unsmoothedModeledSegments = outputDir + "/" + outputPrefix + ".modelBegin.seg" + File unsmoothedCopyRatioParameters = outputDir + "/" + outputPrefix + ".modelBegin.cr.param" + File unsmoothedAlleleFractionParameters = outputDir + "/" + outputPrefix + ".modelBegin.af.param" + File modeledSegments = outputDir + "/" + outputPrefix + ".modelFinal.seg" + File copyRatioParameters = outputDir + "/" + outputPrefix + ".modelFinal.cr.param" + File alleleFractionParameters = outputDir + "/" + outputPrefix + ".modelFinal.af.param" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory to write the ouput to.", category: "common"} + outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} + denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} + allelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts.", category: "required" } + normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"} + minimumTotalAlleleCountCase: {description: "Equivalent to gatk ModelSeqments' `--minimum-total-allele-count-case` option.", category: "advanced"} + maximumNumberOfSmoothingIterations: {description: "Equivalent to gatk ModelSeqments' `--maximum-number-of-smoothing-iterations` option.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task MuTect2 { input { Array[File]+ inputBams @@ -911,6 +1116,110 @@ task MuTect2 { } } +task PlotDenoisedCopyRatios { + input { + File referenceFastaDict + String outputDir = "." + String outputPrefix + File standardizedCopyRatios + File denoisedCopyRatios + + String memory = "21G" + String javaXmx = "7G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p ~{outputDir} + gatk --java-options -XmX~{javaXmx} \ + PlotDenoisedCopyRatios \ + --standardized-copy-ratios ~{standardizedCopyRatios} \ + --denoised-copy-ratios ~{denoisedCopyRatios} \ + --sequence-dictionary ~{referenceFastaDict} \ + --output ~{outputDir} \ + --output-prefix ~{outputPrefix} + } + + output { + File denoisedCopyRatiosPlot = outputDir + "/" + outputPrefix + ".denoised.png" + File denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" + File standardizedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".standardizedMAD.txt" + File denoisedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".denoisedMAD.txt" + File deltaMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".deltaMAD.txt" + File deltaScaledMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".scaledDeltaMAD.txt" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} + outputDir: {description: "The directory to write the ouput to.", category: "common"} + outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} + denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} + standardizedCopyRatios: {description: "The standardized copy ratios as generated by DenoiseReadCounts.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task PlotModeledSegments { + input { + File referenceFastaDict + String outputDir = "." + String outputPrefix + File denoisedCopyRatios + File segments + File allelicCounts + + String memory = "21G" + String javaXmx = "7G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p ~{outputDir} + gatk --java-option -Xmx~{javaXmx} \ + PlotModeledSegments \ + --denoised-copy-ratios ~{denoisedCopyRatios} \ + --allelic-counts ~{allelicCounts} \ + --segments ~{segments} \ + --sequence-dictionary ~{referenceFastaDict} \ + --output ~{outputDir} \ + --output-prefix ~{outputPrefix} + } + + output { + File modeledSegmentsPlot = outputDir + "/" + outputPrefix + ".modeled.png" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} + outputDir: {description: "The directory to write the ouput to.", category: "common"} + outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} + denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} + segments: {description: "The modeled segments as generated by ModelSegments.", category: "required"} + allelicCounts: {description: "The hetrozygous allelic counts as generated by ModelSegments.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task PreprocessIntervals { input { File referenceFasta From 005a9d6e7d947dabcd15e67c5f20f9acce3fa019 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jan 2020 10:46:40 +0100 Subject: [PATCH 0103/1208] set pipefail to prevent errors in bed files --- bedtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bedtools.wdl b/bedtools.wdl index 407250fe..50f9f92e 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -102,6 +102,7 @@ task MergeBedFiles { # A sorted bed is needed for bedtools merge command { + set -e -o pipefail cat ~{sep=" " bedFiles} | bedtools sort | bedtools merge > ~{outputBed} } From 2f091247428716a36da8bacd840d71cafcc5dfb7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jan 2020 10:47:53 +0100 Subject: [PATCH 0104/1208] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index da6b7887..7e0aca59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files + from going unnoticed. + Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple bed files called bedtools.MergeBedFiles. This task combines bedtools merge and sort. From 127c962b30f61590b14a673632f08c16feedf5e3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Jan 2020 11:17:04 +0100 Subject: [PATCH 0105/1208] fix missing backslash --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index f54934a0..12ef0a4e 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1244,7 +1244,7 @@ task PreprocessIntervals { -R ~{referenceFasta} \ --sequence-dictinary ~{referenceFastaDict} \ --bin-length ~{binLength} \ - --padding ~{padding} + --padding ~{padding} \ ~{"-L " + intervals} \ --interval-merging-rule ~{intervalMergingRule} \ -O ~{outputIntervalList} From 96a896c956584d98da46b483b4685a8457996a03 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jan 2020 11:48:33 +0100 Subject: [PATCH 0106/1208] add intersect bed task --- bedtools.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/bedtools.wdl b/bedtools.wdl index 50f9f92e..6ae3b3c7 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -164,3 +164,49 @@ task Sort { docker: dockerImage } } + +task Intersect { + input { + File regionsA + File regionsB + # Giving a faidx file will set the sorted option. + File? faidx + String outputBed = "intersect.bed" + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + Boolean sorted = defined(faidx) + + command { + set -e + ~{"cut -f1,2 " + faidx} ~{true="> sorted.genome" false ="" sorted} + bedtools intersect \ + -a ~{regionsA} \ + -b ~{regionsB} \ + ~{true="--sorted" false="" sorted} \ + ~{true="-g sorted.genome" false="" sorted} \ + > ~{outputBed} + } + + output { + File intersectedBed = outputBed + } + + runtime { + docker: dockerImage + } + + parameter_meta { + faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", + category: "common"} + regionsA: {description: "Region file a to intersect", + category: "required"} + regionsB: {description: "Region file b to intersect", + category: "required"} + outputBed: {description: "The path to write the output to", + category: "advanced"} + dockerImage: { + description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced" + } + } +} From 1f0f1236f11764b6b3af3d1ce2d65cb76ffec643 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jan 2020 13:30:03 +0100 Subject: [PATCH 0107/1208] fix typo --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index 6ae3b3c7..4f39e2a8 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -182,7 +182,7 @@ task Intersect { bedtools intersect \ -a ~{regionsA} \ -b ~{regionsB} \ - ~{true="--sorted" false="" sorted} \ + ~{true="-sorted" false="" sorted} \ ~{true="-g sorted.genome" false="" sorted} \ > ~{outputBed} } From bc15e3b51f8f6ad0cf8e7c2e3365a21976f000b4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jan 2020 13:30:23 +0100 Subject: [PATCH 0108/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e0aca59..becbbb12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add bedtools.Intersect + Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files from going unnoticed. + Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple From 551ab97f0dfd22ca9ed12f1727b30c86a3ee6569 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Jan 2020 14:03:37 +0100 Subject: [PATCH 0109/1208] typos --- gatk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 12ef0a4e..b7b95faf 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -32,7 +32,7 @@ task AnnotateIntervals { File? segmentalDuplicationTrack Int featureQueryLookahead = 1000000 - String memory = "120" + String memory = "10G" String javaXmx = "2G" String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1242,7 +1242,7 @@ task PreprocessIntervals { gatk --java-options -Xmx~{javaXmx} \ PreprocessIntervals \ -R ~{referenceFasta} \ - --sequence-dictinary ~{referenceFastaDict} \ + --sequence-dictionary ~{referenceFastaDict} \ --bin-length ~{binLength} \ --padding ~{padding} \ ~{"-L " + intervals} \ From d648213b9588e91ff74119316ee9aafc8d22b512 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Jan 2020 14:18:49 +0100 Subject: [PATCH 0110/1208] typo --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index b7b95faf..a40696d6 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -39,7 +39,7 @@ task AnnotateIntervals { command { set -e - mkdir -p "$(dirname ~{annotatedIntervalsPath}" + mkdir -p "$(dirname ~{annotatedIntervalsPath})" gatk --java-options -Xmx~{javaXmx} \ AnnotateIntervals \ -R ~{referenceFasta} \ From d9490c2811e5e5cb548c453738bb554359e7249f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jan 2020 14:22:11 +0100 Subject: [PATCH 0111/1208] add text to file task --- common.wdl | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/common.wdl b/common.wdl index 73325bf4..92fd586c 100644 --- a/common.wdl +++ b/common.wdl @@ -158,6 +158,28 @@ task StringArrayMd5 { } } +task TextToFile { + + input { + String text + String outputFile = "out.txt" + String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" + } + + command <<< + echo $'~{text}' > ~{outputFile} + >>> + + output { + File out = outputFile + } + + runtime { + memory: "1G" + docker: dockerImage + } +} + task YamlToJson { input { File yaml From 5488321950256075ae785860000e9737237d9036 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Jan 2020 14:32:18 +0100 Subject: [PATCH 0112/1208] fix mkdir in CreateReadCountPanelOfNormals --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index a40696d6..e33d8caa 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -534,7 +534,7 @@ task CreateReadCountPanelOfNormals { command { set -e - mkdir -p ~{PONpath} + mkdir -p "$(dirname ~{PONpath})" gatk --java-options -Xmx~{javaXmx} \ CreateReadCountPanelOfNormals \ -I ~{sep=" -I " readCountsFiles} \ From 0551737d4afac06a751c15390ec96de160313f33 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jan 2020 15:50:08 +0100 Subject: [PATCH 0113/1208] update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index becbbb12..bbe58dce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- -+ Add bedtools.Intersect ++ Add common.TextToFile task. ++ Add bedtools.Intersect. + Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files from going unnoticed. + Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple From 7d99177c7b2ca336e449c538a9768dce01f3b153 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Jan 2020 16:14:35 +0100 Subject: [PATCH 0114/1208] use broad's gatk container --- gatk.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index e33d8caa..4b7fbbdf 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -34,7 +34,7 @@ task AnnotateIntervals { String memory = "10G" String javaXmx = "2G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { @@ -258,7 +258,7 @@ task CallCopyRatioSegments { String memory = "21G" String javaXmx = "6G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { @@ -302,7 +302,7 @@ task CollectAllelicCounts { File referenceFastaFai String memory = "90G" String javaXmx = "30G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { @@ -354,7 +354,7 @@ task CollectReadCounts { String memory = "35G" String javaXmx = "7G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { @@ -529,7 +529,7 @@ task CreateReadCountPanelOfNormals { String memory = "21G" String javaXmx = "7G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { @@ -573,7 +573,7 @@ task DenoiseReadCounts { String memory = "39G" String javaXmx = "13G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { @@ -988,7 +988,7 @@ task ModelSegments { String memory = "64G" String javaXmx = "10G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { @@ -1126,7 +1126,7 @@ task PlotDenoisedCopyRatios { String memory = "21G" String javaXmx = "7G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { @@ -1180,7 +1180,7 @@ task PlotModeledSegments { String memory = "21G" String javaXmx = "7G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { @@ -1233,7 +1233,7 @@ task PreprocessIntervals { String memory = "10G" String javaXmx = "2G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "broadinstitute/gatk:4.1.4.0" } command { From 8bed940cc18c415fd93f5108bed3f0f7414eabda Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jan 2020 16:29:24 +0100 Subject: [PATCH 0115/1208] update parameter meta --- common.wdl | 6 ++++++ gatk.wdl | 4 +++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/common.wdl b/common.wdl index 92fd586c..87dcce13 100644 --- a/common.wdl +++ b/common.wdl @@ -174,6 +174,12 @@ task TextToFile { File out = outputFile } + parameter_meta { + text: {description: "The text to print", category: "required"} + outputFile: {description: "The name of the output file", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } runtime { memory: "1G" docker: dockerImage diff --git a/gatk.wdl b/gatk.wdl index b44620ae..0b4c71c7 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -342,8 +342,10 @@ task HaplotypeCallerGvcf { parameter_meta { inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} + intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} + excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} gvcfPath: {description: "The location to write the output GVCF to.", category: "required"} + ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", From 1ab404fed817f04af52bbebc2a254c5b506b5bb3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 23 Jan 2020 17:11:53 +0100 Subject: [PATCH 0116/1208] Centrifuge should use -U instead of -1 with single end data. --- centrifuge.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 5110b872..b9eb7624 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -91,8 +91,8 @@ task Classify { Array[File]+ read1 String outputPrefix String outputName = basename(outputPrefix) + Array[File] read2 = [] - Array[File]? read2 Int? trim5 Int? trim3 Int? reportMaxDistinct @@ -121,8 +121,8 @@ task Classify { ~{"--host-taxids " + hostTaxIDs} \ ~{"--exclude-taxids " + excludeTaxIDs} \ ~{"-x " + indexPrefix} \ - ~{true="-1 " false="-U " defined(read2)} ~{sep="," read1} \ - ~{"-2 "} ~{sep="," read2} \ + ~{true="-1" false="-U" length(read2) > 0} ~{sep="," read1} \ + ~{true="-2" false="" length(read2) > 0} ~{sep="," read2} \ ~{"-S " + outputPrefix + "/" + outputName + "_classification.tsv"} \ ~{"--report-file " + outputPrefix + "/" + outputName + "_output_report.tsv"} } From 410b06708120a304003fe768412fca59f00fffed Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 23 Jan 2020 17:12:36 +0100 Subject: [PATCH 0117/1208] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index da6b7887..58e31684 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Centrifuge: Fix -1/-U options for single end data. + Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple bed files called bedtools.MergeBedFiles. This task combines bedtools merge and sort. From 3c3252d0068572e3ae29a2ca1e5912ae288282d9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Jan 2020 11:03:55 +0100 Subject: [PATCH 0118/1208] typo --- gatk.wdl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 4b7fbbdf..9a48ddda 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -34,7 +34,7 @@ task AnnotateIntervals { String memory = "10G" String javaXmx = "2G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { @@ -258,7 +258,7 @@ task CallCopyRatioSegments { String memory = "21G" String javaXmx = "6G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { @@ -302,7 +302,7 @@ task CollectAllelicCounts { File referenceFastaFai String memory = "90G" String javaXmx = "30G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { @@ -354,7 +354,7 @@ task CollectReadCounts { String memory = "35G" String javaXmx = "7G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { @@ -465,7 +465,7 @@ task CombineVariants { String memory = "24G" String javaXmx = "12G" - String dockerImage = "broadinstitute/gatk3:3.8-1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command <<< @@ -529,7 +529,7 @@ task CreateReadCountPanelOfNormals { String memory = "21G" String javaXmx = "7G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer causes a spark related error for some reason... } command { @@ -573,7 +573,7 @@ task DenoiseReadCounts { String memory = "39G" String javaXmx = "13G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { @@ -944,7 +944,7 @@ task MergeStats { String memory = "28G" String javaXmx = "14G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { @@ -988,7 +988,7 @@ task ModelSegments { String memory = "64G" String javaXmx = "10G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { @@ -1061,7 +1061,7 @@ task MuTect2 { String memory = "16G" String javaXmx = "4G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { @@ -1126,13 +1126,13 @@ task PlotDenoisedCopyRatios { String memory = "21G" String javaXmx = "7G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p ~{outputDir} - gatk --java-options -XmX~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} \ PlotDenoisedCopyRatios \ --standardized-copy-ratios ~{standardizedCopyRatios} \ --denoised-copy-ratios ~{denoisedCopyRatios} \ @@ -1180,7 +1180,7 @@ task PlotModeledSegments { String memory = "21G" String javaXmx = "7G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { @@ -1233,7 +1233,7 @@ task PreprocessIntervals { String memory = "10G" String javaXmx = "2G" - String dockerImage = "broadinstitute/gatk:4.1.4.0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { From 3ec3131512d26c3564c7fd45122f7636e685caf3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Jan 2020 14:46:38 +0100 Subject: [PATCH 0119/1208] fix various issues --- gatk.wdl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 9a48ddda..d6b77ffb 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -263,7 +263,7 @@ task CallCopyRatioSegments { command { set -e - mkdir -p "$(~{outputPrefix})" + mkdir -p "$(dirname ~{outputPrefix})" gatk --java-options -Xmx~{javaXmx} \ CallCopyRatioSegments \ -I ~{copyRatioSegments} \ @@ -999,15 +999,15 @@ task ModelSegments { --denoised-copy-ratios ~{denoisedCopyRatios} \ --allelic-counts ~{allelicCounts} \ ~{"--normal-allelic-counts " + normalAllelicCounts} \ - --minimum-total-allele-count-case ~{minimumTotalAlleleCountCase} - --maximum-number-of-smoothing-iterations ~{maximumNumberOfSmoothingIterations} + --minimum-total-allele-count-case ~{minimumTotalAlleleCountCase} \ + --maximum-number-of-smoothing-iterations ~{maximumNumberOfSmoothingIterations} \ --output ~{outputDir} \ --output-prefix ~{outputPrefix} } output { File hetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.tsv" - File normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" + File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" File copyRatioSegments = outputDir + "/" + outputPrefix + ".cr.seg" File copyRatioCBS = outputDir + "/" + outputPrefix + ".cr.igv.seg" File alleleFractionCBS = outputDir + "/" + outputPrefix + ".af.igv.seg" @@ -1124,9 +1124,9 @@ task PlotDenoisedCopyRatios { File standardizedCopyRatios File denoisedCopyRatios - String memory = "21G" + String memory = "32G" String javaXmx = "7G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } command { @@ -1180,13 +1180,13 @@ task PlotModeledSegments { String memory = "21G" String javaXmx = "7G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } command { set -e mkdir -p ~{outputDir} - gatk --java-option -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} \ PlotModeledSegments \ --denoised-copy-ratios ~{denoisedCopyRatios} \ --allelic-counts ~{allelicCounts} \ From 85ceead3180616dbcbdbf738261444234b648391 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 27 Jan 2020 13:03:45 +0100 Subject: [PATCH 0120/1208] update changelog --- CHANGELOG.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28f2e770..8043dfe0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,17 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Added GATK CNV calling tasks: + + AnnotateIntervals + + CallCopyRatioSegments + + CollectAllelicCounts + + CollectReadCounts + + CreateReadCountPanelOfNormals + + DenoiseReadCounts + + ModelSegments + + PlotDenoisedCopyRatios + + PlotModeledSegments + + PreprocessIntervals + Update centrifuge tasks. + Removed unused "cores" inputs from transcriptclean tasks. + Removed unused "cores" inputs from talon tasks. From 95a409715cd70908cfd44240a7ffaaa15ee71b94 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 27 Jan 2020 14:01:40 +0100 Subject: [PATCH 0121/1208] make memory configurable in fastqc --- fastqc.wdl | 84 +++++++++++++----------------------------------------- 1 file changed, 20 insertions(+), 64 deletions(-) diff --git a/fastqc.wdl b/fastqc.wdl index 1e835c4e..31c2b80d 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -18,6 +18,7 @@ task Fastqc { String? dir Int threads = 1 + String memory = "4G" String dockerImage = "quay.io/biocontainers/fastqc:0.11.7--4" Array[File]? NoneArray File? NoneFile @@ -61,74 +62,29 @@ task Fastqc { runtime { cpu: threads + memory: memory docker: dockerImage } parameter_meta { - seqFile: { - description: "A fastq file.", - category: "required" - } - outdirPath: { - description: "The path to write the output to", - catgory: "required" - } - casava: { - description: "Equivalent to fastqc's --casava flag.", - category: "advanced" - } - nano: { - description: "Equivalent to fastqc's --nano flag.", - category: "advanced" - } - noFilter: { - description: "Equivalent to fastqc's --nofilter flag.", - category: "advanced" - } - extract: { - description: "Equivalent to fastqc's --extract flag.", - category: "advanced" - } - nogroup: { - description: "Equivalent to fastqc's --nogroup flag.", - category: "advanced" - } - minLength: { - description: "Equivalent to fastqc's --min_length option.", - category: "advanced" - } - format: { - description: "Equivalent to fastqc's --format option.", - category: "advanced" - } - contaminants: { - description: "Equivalent to fastqc's --contaminants option.", - category: "advanced" - } - adapters: { - description: "Equivalent to fastqc's --adapters option.", - category: "advanced" - } - limits: { - description: "Equivalent to fastqc's --limits option.", - category: "advanced" - } - kmers: { - description: "Equivalent to fastqc's --kmers option.", - category: "advanced" - } - dir: { - description: "Equivalent to fastqc's --dir option.", - category: "advanced" - } - threads: { - description: "The number of cores to use.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + seqFile: {description: "A fastq file.", category: "required"} + outdirPath: {description: "The path to write the output to", catgory: "required"} + casava: {description: "Equivalent to fastqc's --casava flag.", category: "advanced"} + nano: {description: "Equivalent to fastqc's --nano flag.", category: "advanced"} + noFilter: {description: "Equivalent to fastqc's --nofilter flag.", category: "advanced"} + extract: {description: "Equivalent to fastqc's --extract flag.", category: "advanced"} + nogroup: {description: "Equivalent to fastqc's --nogroup flag.", category: "advanced"} + minLength: {description: "Equivalent to fastqc's --min_length option.", category: "advanced"} + format: {description: "Equivalent to fastqc's --format option.", category: "advanced"} + contaminants: {description: "Equivalent to fastqc's --contaminants option.", category: "advanced"} + adapters: {description: "Equivalent to fastqc's --adapters option.", category: "advanced"} + limits: {description: "Equivalent to fastqc's --limits option.", category: "advanced"} + kmers: {description: "Equivalent to fastqc's --kmers option.", category: "advanced"} + dir: {description: "Equivalent to fastqc's --dir option.", category: "advanced"} + threads: {description: "The number of cores to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } meta { From 9fe2ec8633374f705fdd93f8f74a93057341f1fd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 27 Jan 2020 14:02:23 +0100 Subject: [PATCH 0122/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26641097..18a90306 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add `memory` input to fastqc task. + Add common.TextToFile task. + Add bedtools.Intersect. + Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files From bf970477ad988391e1a85b177e95cc42b99b4c86 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 27 Jan 2020 14:05:17 +0100 Subject: [PATCH 0123/1208] increase default memory for BWA mem --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index b0b1daf7..655cd288 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -9,7 +9,7 @@ task Mem { String? readgroup Int threads = 2 - String memory = "16G" + String memory = "32G" String picardXmx = "4G" # A mulled container is needed to have both picard and bwa in one container. # This container contains: picard (2.18.7), bwa (0.7.17-r1188) From 0c7e1b4ab724d9a6a24d72356217fa7e7e7aa652 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 27 Jan 2020 14:05:59 +0100 Subject: [PATCH 0124/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26641097..5f3b4012 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Increase default memory of BWA mem to 32G (was 16G). + Add common.TextToFile task. + Add bedtools.Intersect. + Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files From 57b7c80c4ce3734c91005ab904a0ceb660135b43 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 27 Jan 2020 16:48:03 +0100 Subject: [PATCH 0125/1208] Fix issue where centrifuge would fail on incorrect paths. --- centrifuge.wdl | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index b9eb7624..5f24365c 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -90,7 +90,6 @@ task Classify { String indexPrefix Array[File]+ read1 String outputPrefix - String outputName = basename(outputPrefix) Array[File] read2 = [] Int? trim5 @@ -113,7 +112,7 @@ task Classify { ~{inputFormatOptions[inputFormat]} \ ~{true="--phred64" false="--phred33" phred64} \ ~{"--min-hitlen " + minHitLength} \ - ~{"--met-file " + outputPrefix + "/" + outputName + "_alignment_metrics.tsv"} \ + ~{"--met-file " + outputPrefix + "_alignment_metrics.tsv"} \ ~{"--threads " + threads} \ ~{"--trim5 " + trim5} \ ~{"--trim3 " + trim3} \ @@ -123,14 +122,14 @@ task Classify { ~{"-x " + indexPrefix} \ ~{true="-1" false="-U" length(read2) > 0} ~{sep="," read1} \ ~{true="-2" false="" length(read2) > 0} ~{sep="," read2} \ - ~{"-S " + outputPrefix + "/" + outputName + "_classification.tsv"} \ - ~{"--report-file " + outputPrefix + "/" + outputName + "_output_report.tsv"} + ~{"-S " + outputPrefix + "_classification.tsv"} \ + ~{"--report-file " + outputPrefix + "_output_report.tsv"} } output { - File outputMetrics = outputPrefix + "/" + outputName + "_alignment_metrics.tsv" - File outputClassification = outputPrefix + "/" + outputName + "_classification.tsv" - File outputReport = outputPrefix + "/" + outputName + "_output_report.tsv" + File outputMetrics = outputPrefix + "_alignment_metrics.tsv" + File outputClassification = outputPrefix + "_classification.tsv" + File outputReport = outputPrefix + "_output_report.tsv" } runtime { @@ -146,7 +145,6 @@ task Classify { indexPrefix: {description: "The basename of the index for the reference genomes.", category: "required"} read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - outputName: {description: "The base name of the outputPrefix.", category: "required"} read2: {description: "List of files containing mate 2s.", category: "common"} trim5: {description: "Trim bases from 5' (left) end of each read before alignment.", category: "common"} trim3: {description: "Trim bases from 3' (right) end of each read before alignment.", category: "common"} From c548516d28cf725affee9fa91d449a888cd96c9b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 27 Jan 2020 16:49:52 +0100 Subject: [PATCH 0126/1208] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b186098c..3f5a5d28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Centrifuge: Fix issue where centrifuge would fail on incorrect paths. + Added GATK CNV calling tasks: + AnnotateIntervals + CallCopyRatioSegments From 8dd64d7f1fe96b4d9c97e6b789943a01d5ba6168 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 28 Jan 2020 11:10:42 +0100 Subject: [PATCH 0127/1208] fix bug where text to file does not work with other shells than bash --- common.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.wdl b/common.wdl index 87dcce13..389b3511 100644 --- a/common.wdl +++ b/common.wdl @@ -167,7 +167,7 @@ task TextToFile { } command <<< - echo $'~{text}' > ~{outputFile} + echo ~{text} > ~{outputFile} >>> output { From 5183e34ff18baa59cd1997b26ad22a2240a9b3c4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 28 Jan 2020 14:45:56 +0100 Subject: [PATCH 0128/1208] Fix issue where Centrifuge could not locate index files. --- centrifuge.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 5f24365c..32ddff30 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -87,7 +87,7 @@ task Classify { String inputFormat = "fastq" Boolean phred64 = false Int minHitLength = 22 - String indexPrefix + Array[File]+ indexFiles Array[File]+ read1 String outputPrefix Array[File] read2 = [] @@ -119,7 +119,7 @@ task Classify { ~{"-k " + reportMaxDistinct} \ ~{"--host-taxids " + hostTaxIDs} \ ~{"--exclude-taxids " + excludeTaxIDs} \ - ~{"-x " + indexPrefix} \ + ~{"-x " + sub(indexFiles[0], "\.[0-9]\.cf", "")} \ ~{true="-1" false="-U" length(read2) > 0} ~{sep="," read1} \ ~{true="-2" false="" length(read2) > 0} ~{sep="," read2} \ ~{"-S " + outputPrefix + "_classification.tsv"} \ @@ -142,7 +142,7 @@ task Classify { inputFormat: {description: "The format of the read file(s).", category: "required"} phred64: {description: "If set to true, Phred+64 encoding is used.", category: "required"} minHitLength: {description: "Minimum length of partial hits.", category: "required"} - indexPrefix: {description: "The basename of the index for the reference genomes.", category: "required"} + indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} read2: {description: "List of files containing mate 2s.", category: "common"} From c3bd11df8bc16b09a480fe2a0cc242ce099fb437 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 28 Jan 2020 14:46:33 +0100 Subject: [PATCH 0129/1208] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83e8aba1..bf399465 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Centrifuge: Fix issue where Centrifuge could not locate index files. + Increase default memory of BWA mem to 32G (was 16G). + Add `memory` input to fastqc task. + Centrifuge: Fix issue where centrifuge would fail on incorrect paths. From e1ed820e7f54eee3b26a0413a6c93eca95384f09 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 28 Jan 2020 15:46:33 +0100 Subject: [PATCH 0130/1208] add index for commonvariantsites --- gatk.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index eff98bf8..059c225e 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -295,6 +295,7 @@ task CollectAllelicCounts { input { String allelicCountsPath = "allelic_counts.tsv" File commonVariantSites + File? commonVariantSitesIndex File inputBam File inputBamIndex File referenceFasta @@ -1129,6 +1130,7 @@ task PlotDenoisedCopyRatios { String outputPrefix File standardizedCopyRatios File denoisedCopyRatios + Int? minimumContigLength String memory = "32G" String javaXmx = "7G" @@ -1143,6 +1145,7 @@ task PlotDenoisedCopyRatios { --standardized-copy-ratios ~{standardizedCopyRatios} \ --denoised-copy-ratios ~{denoisedCopyRatios} \ --sequence-dictionary ~{referenceFastaDict} \ + ~{"--minimum-contig-length " + minimumContigLength} \ --output ~{outputDir} \ --output-prefix ~{outputPrefix} } @@ -1183,6 +1186,7 @@ task PlotModeledSegments { File denoisedCopyRatios File segments File allelicCounts + Int? minimumContigLength String memory = "21G" String javaXmx = "7G" @@ -1198,6 +1202,7 @@ task PlotModeledSegments { --allelic-counts ~{allelicCounts} \ --segments ~{segments} \ --sequence-dictionary ~{referenceFastaDict} \ + ~{"--minimum-contig-length " + minimumContigLength} \ --output ~{outputDir} \ --output-prefix ~{outputPrefix} } From b9391264e094ab0e38530e3921bd410cb29f06a2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 28 Jan 2020 15:49:01 +0100 Subject: [PATCH 0131/1208] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83e8aba1..f4ce7c0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add `minimumContigLength` input to PlotDenoisedCopyRatios and PlotModeledSegments. ++ Add `commonVariantSitesIndex` input to CollectAllelicCounts + Increase default memory of BWA mem to 32G (was 16G). + Add `memory` input to fastqc task. + Centrifuge: Fix issue where centrifuge would fail on incorrect paths. From 801e82aa3b50a09db95acdfdc8ba6795b7629b48 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 28 Jan 2020 16:03:52 +0100 Subject: [PATCH 0132/1208] add missing parameter_meta --- gatk.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 059c225e..eb050f9a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -328,7 +328,8 @@ task CollectAllelicCounts { parameter_meta { allelicCountsPath: {description: "The path the output should be written to.", category: "advanced"} - commonVariantSites: {description: "Interval list of common vairat sies (to retrieve the allelic counts for).", category: "required"} + commonVariantSites: {description: "Interval list or vcf of common variant sites (to retrieve the allelic counts for).", category: "required"} + commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"} inputBam: {description: "The BAM file to generate counts for.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} @@ -1170,6 +1171,7 @@ task PlotDenoisedCopyRatios { outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} standardizedCopyRatios: {description: "The standardized copy ratios as generated by DenoiseReadCounts.", category: "required"} + minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -1223,6 +1225,7 @@ task PlotModeledSegments { denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} segments: {description: "The modeled segments as generated by ModelSegments.", category: "required"} allelicCounts: {description: "The hetrozygous allelic counts as generated by ModelSegments.", category: "required"} + minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From 2105f0169134562581479451a65366d195991fb3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 29 Jan 2020 11:46:36 +0100 Subject: [PATCH 0133/1208] Update parameter_meta and fix centrifuge index locating. --- centrifuge.wdl | 26 ++++++++++++++++++++++++-- minimap2.wdl | 6 ++---- talon.wdl | 32 ++++++++++++++++---------------- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 32ddff30..0d05fee4 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -68,6 +68,7 @@ task Build { } parameter_meta { + # inputs disableDifferenceCover: {description: "Disable use of the difference-cover sample.", category: "required"} conversionTable: {description: "List of UIDs (unique ID) and corresponding taxonomic IDs.", category: "required"} taxonomyTree: {description: "Taxonomic tree (e.g. nodes.dmp).", category: "required"} @@ -79,6 +80,12 @@ task Build { ftabChars: {description: "Calculate an initial BW range with respect to this character.", category: "common"} kmerCount: {description: "Use as kmer-size for counting the distinct number of k-mers in the input sequences.", category: "common"} sizeTable: {description: "List of taxonomic IDs and lengths of the sequences belonging to the same taxonomic IDs.", category: "common"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputIndex: {description: "Generated Centrifuge index."} } } @@ -139,6 +146,7 @@ task Classify { } parameter_meta { + # inputs inputFormat: {description: "The format of the read file(s).", category: "required"} phred64: {description: "If set to true, Phred+64 encoding is used.", category: "required"} minHitLength: {description: "Minimum length of partial hits.", category: "required"} @@ -151,13 +159,21 @@ task Classify { reportMaxDistinct: {description: "It searches for at most distinct, primary assignments for each read or pair.", category: "common"} hostTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be preferred in classification procedure.", category: "common"} excludeTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be excluded in classification procedure.", category: "common"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputMetrics: {description: "File with Centrifuge metrics."} + outputClassification: {description: "File with the classification results."} + outputReport: {description: "File with a classification summary."} } } task Inspect { input { String printOption = "fasta" - String indexBasename + Array[File]+ indexFiles String outputPrefix Int? across @@ -174,7 +190,7 @@ task Inspect { centrifuge-inspect \ ~{outputOptions[printOption]} \ ~{"--across " + across} \ - ~{indexBasename} \ + ~{sub(indexFiles[0], "\.[0-9]\.cf", "")} \ > ~{outputPrefix + "/" + printOption} } @@ -188,10 +204,16 @@ task Inspect { } parameter_meta { + # inputs printOption: {description: "The output option for inspect (fasta, summary, conversionTable, taxonomyTree, nameTable, sizeTable)", category: "required"} indexBasename: {description: "The basename of the index to be inspected.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} across: {description: "When printing FASTA output, output a newline character every bases.", category: "common"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputInspect: {description: "Output file according to output option."} } } diff --git a/minimap2.wdl b/minimap2.wdl index aff51dcc..32c0666a 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -68,8 +68,7 @@ task Indexing { splitIndex: {description: "Split index for every ~NUM input bases.", category: "advanced"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output outputIndexFile: {description: "Indexed reference file."} @@ -150,8 +149,7 @@ task Mapping { queryFile: {description: "Input fasta file.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output outputAlignmentFile: {description: "Mapping and alignment between collections of DNA sequences file."} diff --git a/talon.wdl b/talon.wdl index c861e56d..05018962 100644 --- a/talon.wdl +++ b/talon.wdl @@ -64,8 +64,7 @@ task CreateAbundanceFileFromDatabase { whitelistFile: {description: "Whitelist file of transcripts to include in the output.", category: "advanced"} datasetsFile: {description: "A file indicating which datasets should be included.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputAbundanceFile: {description: "Abundance for each transcript in the TALON database across datasets."} @@ -120,8 +119,7 @@ task CreateGtfFromDatabase { whitelistFile: {description: "Whitelist file of transcripts to include in the output.", category: "advanced"} datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputGTFfile: {description: "The genes, transcripts, and exons stored a TALON database in GTF format."} @@ -160,13 +158,16 @@ task FilterTalonTranscripts { } parameter_meta { + # inputs databaseFile: {description: "TALON database.", category: "required"} annotationVersion: {description: "Which annotation version to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} pairingsFile: {description: "A file indicating which datasets should be considered together.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputTranscriptWhitelist: {description: "A transcript whitelist produced from the TALON database."} } } @@ -208,8 +209,7 @@ task GetReadAnnotations { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputAnnotation: {description: "Read-specific annotation information from a TALON database."} @@ -265,8 +265,7 @@ task InitializeTalonDatabase { cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputDatabase: {description: "TALON database."} @@ -297,10 +296,13 @@ task ReformatGtf { } parameter_meta { + # inputs GTFfile: {description: "GTF annotation containing genes, transcripts, and edges.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputReformattedGTF: {description: "Reformatted GTF file."} } } @@ -342,8 +344,7 @@ task SummarizeDatasets { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputSummaryFile: {description: "Tab-delimited file of gene and transcript counts for each dataset."} @@ -413,8 +414,7 @@ task Talon { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputUpdatedDatabase: {description: "Updated TALON database."} From 790444da9724ac4c415ff3fbaf698d878805be1a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 29 Jan 2020 11:50:25 +0100 Subject: [PATCH 0134/1208] Update CHANGELOG. --- CHANGELOG.md | 2 ++ centrifuge.wdl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab2b0150..9bea5ba9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Update parameter_meta for TALON, Centrifuge and Minimap2. ++ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location. + Add `minimumContigLength` input to PlotDenoisedCopyRatios and PlotModeledSegments. + Add `commonVariantSitesIndex` input to CollectAllelicCounts + Centrifuge: Fix issue where Centrifuge could not locate index files. diff --git a/centrifuge.wdl b/centrifuge.wdl index 0d05fee4..fc6509b3 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -206,7 +206,7 @@ task Inspect { parameter_meta { # inputs printOption: {description: "The output option for inspect (fasta, summary, conversionTable, taxonomyTree, nameTable, sizeTable)", category: "required"} - indexBasename: {description: "The basename of the index to be inspected.", category: "required"} + indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} across: {description: "When printing FASTA output, output a newline character every bases.", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From fcde250747a4ced692f327dee53f5f952fa3525b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 30 Jan 2020 10:36:33 +0100 Subject: [PATCH 0135/1208] adapt haplotypecaller --- gatk.wdl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index eff98bf8..1aef5105 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -837,20 +837,21 @@ task GetPileupSummaries { } # Call variants on a single sample with HaplotypeCaller to produce a GVCF -task HaplotypeCallerGvcf { +task HaplotypeCaller { input { Array[File]+ inputBams Array[File]+ inputBamsIndex Array[File]+? intervalList Array[File]+? excludeIntervalList - String gvcfPath + String outputPath File referenceFasta File referenceFastaIndex File referenceFastaDict - Float contamination = 0.0 + Float? contamination File? dbsnpVCF File? dbsnpVCFIndex Int? ploidy + Boolean gvcf = false String memory = "12G" String javaXmx = "4G" @@ -859,23 +860,23 @@ task HaplotypeCallerGvcf { command { set -e - mkdir -p "$(dirname ~{gvcfPath})" + mkdir -p "$(dirname ~{outputPath})" gatk --java-options -Xmx~{javaXmx} \ HaplotypeCaller \ -R ~{referenceFasta} \ - -O ~{gvcfPath} \ + -O ~{outputPath} \ -I ~{sep=" -I " inputBams} \ ~{"--sample-ploidy " + ploidy} \ ~{true="-L" false="" defined(intervalList)} ~{sep=' -L ' intervalList} \ ~{true="-XL" false="" defined(excludeIntervalList)} ~{sep=' -XL ' excludeIntervalList} \ ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \ - -contamination ~{contamination} \ - -ERC GVCF + ~{"--contamination-fraction-per-sample-file " + contamination} \ + ~{true="-ERC GVCF" false="" gvcf} } output { - File outputGVCF = gvcfPath - File outputGVCFIndex = gvcfPath + ".tbi" + File outputVCF = outputPath + File outputVCFIndex = outputPath + ".tbi" } runtime { From b260dad4a0c7cb2783726975f6d2fdbf30289264 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 30 Jan 2020 10:37:45 +0100 Subject: [PATCH 0136/1208] Fix Centrifuge tests, where sometimes the index files could still not be located. --- CHANGELOG.md | 1 + centrifuge.wdl | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bea5ba9..54c398e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located. + Update parameter_meta for TALON, Centrifuge and Minimap2. + Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location. + Add `minimumContigLength` input to PlotDenoisedCopyRatios and PlotModeledSegments. diff --git a/centrifuge.wdl b/centrifuge.wdl index fc6509b3..2869d414 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -112,9 +112,12 @@ task Classify { Map[String, String] inputFormatOptions = {"fastq": "-q", "fasta": "-f", "qseq": "--qseq", "raw": "-r", "sequences": "-c"} - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" + indexPath=~{sub(indexFiles[0], "\.[0-9]\.cf", "")} + indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" + mv ${indexPath}* $PWD/ centrifuge \ ~{inputFormatOptions[inputFormat]} \ ~{true="--phred64" false="--phred33" phred64} \ @@ -126,12 +129,12 @@ task Classify { ~{"-k " + reportMaxDistinct} \ ~{"--host-taxids " + hostTaxIDs} \ ~{"--exclude-taxids " + excludeTaxIDs} \ - ~{"-x " + sub(indexFiles[0], "\.[0-9]\.cf", "")} \ + -x $PWD/${indexBasename} \ ~{true="-1" false="-U" length(read2) > 0} ~{sep="," read1} \ ~{true="-2" false="" length(read2) > 0} ~{sep="," read2} \ ~{"-S " + outputPrefix + "_classification.tsv"} \ ~{"--report-file " + outputPrefix + "_output_report.tsv"} - } + >>> output { File outputMetrics = outputPrefix + "_alignment_metrics.tsv" @@ -187,10 +190,13 @@ task Inspect { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + indexPath=~{sub(indexFiles[0], "\.[0-9]\.cf", "")} + indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" + mv ${indexPath}* $PWD/ centrifuge-inspect \ ~{outputOptions[printOption]} \ ~{"--across " + across} \ - ~{sub(indexFiles[0], "\.[0-9]\.cf", "")} \ + $PWD/${indexBasename} \ > ~{outputPrefix + "/" + printOption} } From 457a3437b74fc3b594c853f25c5f1cf1d338c8a4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 30 Jan 2020 10:43:35 +0100 Subject: [PATCH 0137/1208] Fix command section for Inspect. --- centrifuge.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 2869d414..3f01d0e4 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -187,7 +187,7 @@ task Inspect { Map[String, String] outputOptions = {"fasta": "", "names": "--names", "summary": "--summary", "conversionTable": "--conversion-table", "taxonomyTree": "--taxonomy-tree", "nameTable": "--name-table", "sizeTable": "--size-table"} - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" indexPath=~{sub(indexFiles[0], "\.[0-9]\.cf", "")} @@ -198,7 +198,7 @@ task Inspect { ~{"--across " + across} \ $PWD/${indexBasename} \ > ~{outputPrefix + "/" + printOption} - } + >>> output { File outputInspect = outputPrefix + "/" + printOption From 20c4c2174c4889f7b0cb7d08972198dbc49ddb24 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 30 Jan 2020 11:34:46 +0100 Subject: [PATCH 0138/1208] change input --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 78a44e27..cfbc6a6a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -891,7 +891,7 @@ task HaplotypeCaller { inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} - gvcfPath: {description: "The location to write the output GVCF to.", category: "required"} + outputPath: {description: "The location to write the output to.", category: "required"} ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} From 7ba4c125163a377e0ce1bd2a0a29e98d17f3b978 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 30 Jan 2020 13:47:07 +0100 Subject: [PATCH 0139/1208] Replace moving with hardlinking. --- centrifuge.wdl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 3f01d0e4..909de67b 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -115,9 +115,11 @@ task Classify { command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" - indexPath=~{sub(indexFiles[0], "\.[0-9]\.cf", "")} indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" - mv ${indexPath}* $PWD/ + for file in ~{sep=" " indexFiles} + do + ln ${file} $PWD/"$(basename ${file})" + done centrifuge \ ~{inputFormatOptions[inputFormat]} \ ~{true="--phred64" false="--phred33" phred64} \ @@ -190,9 +192,11 @@ task Inspect { command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" - indexPath=~{sub(indexFiles[0], "\.[0-9]\.cf", "")} indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" - mv ${indexPath}* $PWD/ + for file in ~{sep=" " indexFiles} + do + ln ${file} $PWD/"$(basename ${file})" + done centrifuge-inspect \ ~{outputOptions[printOption]} \ ~{"--across " + across} \ From 55477bb767a1269eff7c637ae54b2fe3806af17c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jan 2020 15:48:20 +0100 Subject: [PATCH 0140/1208] add umi-tools --- umi-tools.wdl | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 umi-tools.wdl diff --git a/umi-tools.wdl b/umi-tools.wdl new file mode 100644 index 00000000..6bc4056a --- /dev/null +++ b/umi-tools.wdl @@ -0,0 +1,114 @@ +version 1.0 + +# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Extract { + input { + File read1 + File? read2 + String bcPattern + String? bcPattern2 + Boolean threePrime = false + String read1Output = "umi_extracted_R1.fastq.gz" + String? read2Output = "umi_extracted_R2.fastq.gz" + + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" + } + + command { + umi_tools extract \ + --stdin ~{read1} \ + ~{"--read2-in " + read2} \ + --bc-pattern ~{bcPattern} \ + ~{"bc-pattern2 " + bcPattern2} \ + ~{true="--3prime" false="" threePrime} \ + --stdout ~{read1Output} \ + ~{"--read2-out " + read2Output} + } + + output { + File extractedRead1 = read1Output + File? extractedRead2 = read2Output + } + + runtime { + docker: dockerImage + } + + parameter_meta { + read1: {description: "The first/single-end fastq file.", category: "required"} + read2: {description: "The second-end fastq file.", category: "common"} + bcPattern: {description: "The pattern to be used for UMI extraction. See the umi_tools docs for more information.", category: "required"} + bcPattern2: {description: "The pattern to be used for UMI extraction in the second-end reads. See the umi_tools docs for more information.", category: "advanced"} + threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} + read1Output: {description: "The location to write the first/single-end output fastq file to.", category: "advanced"} + read2Output: {description: "The location to write the second-end output fastq file to.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Dedup { + input { + File inputBam + File inputBamIndex + String outputBamPath + String statsPrefix = "stats" + Boolean paired = true + + # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" + } + + String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") + + command { + set -e + umi_tools dedup \ + --stdin ~{inputBam} \ + --stdout ~{outputBamPath} \ + --output-stats ~{statsPrefix} \ + ~{true="--paired" false="" paired} + samtools index ~{outputBamPath} ~{outputBamIndex} + } + + output { + File deduppedBam = outputBamPath + File deduppedBamIndex = outputBamIndex + File editDistance = statsPrefix + "_edit_distance.tsv" + File umiStats = statsPrefix + "_per_umi.tsv" + File positionStats = statsPrefix + "_per_umi_per_position.tsv" + } + + runtime { + docker: dockerImage + } + + parameter_meta { + inputBam: {description: "The input BAM file.", categrory: "required"} + inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} + outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} + statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} + paired: {description: "Whether or not the data is paired.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file From 4a7474a455fe5603d60bd0d7a78c6d1be3845aa8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 31 Jan 2020 09:14:09 +0100 Subject: [PATCH 0141/1208] GenotypeGVCFs only works on a single file --- gatk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index cfbc6a6a..39f4911a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -725,8 +725,8 @@ task GatherBqsrReports { task GenotypeGVCFs { input { - Array[File]+ gvcfFiles - Array[File]+ gvcfFilesIndex + File gvcfFile + File gvcfFileIndex Array[File]+ intervals String outputPath File referenceFasta @@ -751,7 +751,7 @@ task GenotypeGVCFs { -G StandardAnnotation \ --only-output-calls-starting-in-intervals \ -new-qual \ - -V ~{sep=' -V ' gvcfFiles} \ + -V ~{gvcfFile} \ -L ~{sep=' -L ' intervals} } From 387ecd36e5cc49d7d124336524407f3c38fa9a12 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 31 Jan 2020 09:28:16 +0100 Subject: [PATCH 0142/1208] cleanup genotypeGVCFs --- gatk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 39f4911a..3b73117f 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -732,6 +732,7 @@ task GenotypeGVCFs { File referenceFasta File referenceFastaDict File referenceFastaFai + Array[String] annotationGroups = ["StandardAnnotation"] File? dbsnpVCF File? dbsnpVCFIndex @@ -747,10 +748,9 @@ task GenotypeGVCFs { GenotypeGVCFs \ -R ~{referenceFasta} \ -O ~{outputPath} \ - ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \ - -G StandardAnnotation \ + ~{"-D " + dbsnpVCF} \ + ~{true="-G" false="" length(annotationGroups) > 0} ~{sep=" -G " annotationGroups} \ --only-output-calls-starting-in-intervals \ - -new-qual \ -V ~{gvcfFile} \ -L ~{sep=' -L ' intervals} } From 19a1614bb3b571824a5717168c1238e47a6c878a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 31 Jan 2020 09:32:52 +0100 Subject: [PATCH 0143/1208] better dbsnp flag --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 3b73117f..1311cef3 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -871,7 +871,7 @@ task HaplotypeCaller { ~{"--sample-ploidy " + ploidy} \ ~{true="-L" false="" defined(intervalList)} ~{sep=' -L ' intervalList} \ ~{true="-XL" false="" defined(excludeIntervalList)} ~{sep=' -XL ' excludeIntervalList} \ - ~{true="-D" false="" defined(dbsnpVCF)} ~{dbsnpVCF} \ + ~{"-D" + dbsnpVCF} \ ~{"--contamination-fraction-per-sample-file " + contamination} \ ~{true="-ERC GVCF" false="" gvcf} } From b853b581ed042ce0bf438e4a746fe7ea8ddc3daf Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 31 Jan 2020 09:36:45 +0100 Subject: [PATCH 0144/1208] fix parameter meta --- gatk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 1311cef3..8785b3e7 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -767,8 +767,8 @@ task GenotypeGVCFs { } parameter_meta { - gvcfFiles: {description: "The GVCF files to be genotypes.", category: "required"} - gvcfFilesIndex: {description: "The index of the input GVCF files.", category: "required"} + gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"} + gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} outputPath: {description: "The location to write the output VCF file to.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", From ca18e05566c911a92b81471527ae74788abd857f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 31 Jan 2020 10:02:42 +0100 Subject: [PATCH 0145/1208] Add Centrifuge specific Krona task. --- centrifuge.wdl | 109 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 27 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 909de67b..e2c43c82 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -296,45 +296,100 @@ task DownloadTaxonomy { task Kreport { input { - String? preCommand - File centrifugeOut - Boolean inputIsCompressed - String outputDir - String suffix = "kreport" - String prefix = "centrifuge" - String indexPrefix - Boolean? onlyUnique ## removed in 1.0.4 - Boolean? showZeros - Boolean? isCountTable - Int? minScore - Int? minLength - - Int cores = 1 + File centrifugeClassification + String outputPrefix + Array[File]+ indexFiles + Boolean noLCA = false + Boolean showZeros = false + Boolean isCountTable = false + + Int? minimumScore + Int? minimumLength + String memory = "4G" + String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" } - String kreportFilePath = outputDir + "/" + prefix + "." + suffix - command { - set -e -o pipefail - ~{preCommand} + command <<< + set -e + mkdir -p "$(dirname ~{outputPrefix})" + indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" + for file in ~{sep=" " indexFiles} + do + ln ${file} $PWD/"$(basename ${file})" + done centrifuge-kreport \ - -x ~{indexPrefix} \ - ~{true="--only-unique" false="" onlyUnique} \ + -x $PWD/${indexBasename} \ + ~{true="--no-lca" false="" noLCA} \ ~{true="--show-zeros" false="" showZeros} \ ~{true="--is-count-table" false="" isCountTable} \ - ~{"--min-score " + minScore} \ - ~{"--min-length " + minLength} \ - ~{true="<(zcat" false="" inputIsCompressed} ~{centrifugeOut}\ - ~{true=")" false="" inputIsCompressed} \ - > ~{kreportFilePath} + ~{"--min-score " + minimumScore} \ + ~{"--min-length " + minimumLength} \ + ~{centrifugeClassification} \ + > ~{outputPrefix + "_kreport.tsv"} + >>> + + output { + File outputKreport = outputPrefix + "_kreport.tsv" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + centrifugeClassification: {description: "File with Centrifuge classification results.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} + noLCA: {description: "Do not report the LCA of multiple assignments, but report count fractions at the taxa.", category: "advanced"} + showZeros: {description: "Show clades that have zero reads.", category: "advanced"} + isCountTable: {description: "The format of the file is taxIDCOUNT.", category: "advanced"} + minimumScore: {description: "Require a minimum score for reads to be counted.", category: "advanced"} + minimumLength: {description: "Require a minimum alignment length to the read.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputKreport: {description: "File with kraken style report."} + } +} + +task KTimportTaxonomy { + input { + File inputFile + String outputPrefix + + String memory = "4G" + String dockerImage = "quay.io/biocontainers/krona:2.7.1--pl526_1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + cat ~{inputFile} | cut -f 1,3 > kronaInput.krona + ktImportTaxonomy kronaInput.krona + cp taxonomy.krona.html ~{outputPrefix + "_krona.html"} } output { - File kreport = kreportFilePath + File outputKronaPlot = outputPrefix + "_krona.html" } runtime { - cpu: cores memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "File with Centrifuge classification results.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputKronaPlot: {description: "Krona taxonomy plot html file."} } } From be34af2f01fd212ea17178233ae6dbe5d8dd8427 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 31 Jan 2020 11:22:42 +0100 Subject: [PATCH 0146/1208] add genomicsDBImport --- gatk.wdl | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 8785b3e7..29944a0b 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -723,6 +723,39 @@ task GatherBqsrReports { } } +task GenomicsDBImport { + input { + Array[File] gvcfFiles + Array[File] gvcfFilesIndex + Array[File]+ intervals + String genomicsDBWorkspacePath = "genomics_db" + String? tmpDir + String memory = "12G" + String javaXmx = "4G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{genomicsDBWorkspacePath})" + gatk --java-options -Xmx~{javaXmx} \ + GenomicsDBImport \ + -V ~{sep=" -V " gvcfFiles} \ + --genomicsdb-workspace-path ~{genomicsDBWorkspacePath} \ + ~{"--tmp-dir " + tmpDir} \ + -L ~{sep=" -L " intervals} + } + + output { + Array[File] genomicsDbFiles = glob(genomicsDBWorkspacePath + "/*") + } + + runtime { + docker: dockerImage + memory: memory + } +} + task GenotypeGVCFs { input { File gvcfFile From 75903be39e095907ad15b1a6f548615e7e1f4914 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 31 Jan 2020 15:04:26 +0100 Subject: [PATCH 0147/1208] fix missing directory and read2output umitools --- umi-tools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 6bc4056a..d1842c8a 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -41,7 +41,7 @@ task Extract { ~{"bc-pattern2 " + bcPattern2} \ ~{true="--3prime" false="" threePrime} \ --stdout ~{read1Output} \ - ~{"--read2-out " + read2Output} + ~{if defined(read2) then "--read2-out " + read2Output else ""} } output { @@ -82,6 +82,7 @@ task Dedup { command { set -e + mkdir -p "$(dirname ~{outputBamPath})" umi_tools dedup \ --stdin ~{inputBam} \ --stdout ~{outputBamPath} \ From 1a51a35465660e545e0730d09e6b6ba325e1f97e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2020 07:42:28 +0100 Subject: [PATCH 0148/1208] tar the genomicsdbworkspace path --- gatk.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 29944a0b..aac44d95 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -729,6 +729,7 @@ task GenomicsDBImport { Array[File] gvcfFilesIndex Array[File]+ intervals String genomicsDBWorkspacePath = "genomics_db" + String genomicsDBTarFile = "genomics_db.tar.gz" String? tmpDir String memory = "12G" String javaXmx = "4G" @@ -744,10 +745,11 @@ task GenomicsDBImport { --genomicsdb-workspace-path ~{genomicsDBWorkspacePath} \ ~{"--tmp-dir " + tmpDir} \ -L ~{sep=" -L " intervals} + bash -c 'tar -cvzf ~{genomicsDBTarFile} ~{genomicsDBWorkspacePath}/*' } output { - Array[File] genomicsDbFiles = glob(genomicsDBWorkspacePath + "/*") + Array[File] genomicsDbTarArchive = genomicsDBTarFile } runtime { From 93859b9e75a26aee27d9f8ef717f3f8a4b6d997c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2020 07:49:28 +0100 Subject: [PATCH 0149/1208] add parameter_meta --- gatk.wdl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index aac44d95..fcc100df 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -756,6 +756,21 @@ task GenomicsDBImport { docker: dockerImage memory: memory } + + parameter_meta { + gvcfFiles: {description: "The gvcfFiles to be merged.", category: "required"} + gvcfFilesIndex: {description: "Indexes for the gvcfFiles.", category: "required"} + intervals: {description: "intervals over which to operate.", category: "required"} + genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored", category: "advanced"} + genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored", category: "advanced"} + tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers", + category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } task GenotypeGVCFs { From 48ac569f6277ea927bf2c8c091e8b5dac5180c8d Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 3 Feb 2020 09:32:58 +0100 Subject: [PATCH 0150/1208] Update Krona container. --- centrifuge.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index e2c43c82..a3e7aeaf 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -37,7 +37,7 @@ task Build { Int threads = 5 String memory = "20G" - String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" + String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } command { @@ -107,7 +107,7 @@ task Classify { Int threads = 4 String memory = "16G" - String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" + String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } Map[String, String] inputFormatOptions = {"fastq": "-q", "fasta": "-f", "qseq": "--qseq", "raw": "-r", "sequences": "-c"} @@ -184,7 +184,7 @@ task Inspect { Int? across String memory = "4G" - String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" + String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } Map[String, String] outputOptions = {"fasta": "", "names": "--names", "summary": "--summary", "conversionTable": "--conversion-table", "taxonomyTree": "--taxonomy-tree", "nameTable": "--name-table", "sizeTable": "--size-table"} @@ -307,7 +307,7 @@ task Kreport { Int? minimumLength String memory = "4G" - String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he860b03_3" + String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } command <<< @@ -362,7 +362,7 @@ task KTimportTaxonomy { String outputPrefix String memory = "4G" - String dockerImage = "quay.io/biocontainers/krona:2.7.1--pl526_1" + String dockerImage = "biocontainers/krona:v2.7.1_cv1" } command { From ac2824708cfa631a9d7e4cd979b11601b0dd072b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 3 Feb 2020 09:49:50 +0100 Subject: [PATCH 0151/1208] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54c398e0..78408aa2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Centrifuge: Add Krona task specific to Centrifuge. + Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located. + Update parameter_meta for TALON, Centrifuge and Minimap2. + Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location. From 9af750f5339de155eebf516e16c35b6715e34579 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2020 11:17:15 +0100 Subject: [PATCH 0152/1208] update parameter_meta --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index fcc100df..7a695953 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -826,6 +826,7 @@ task GenotypeGVCFs { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + annotationGroups: {description: "Which annotation groups will be used for the annotation", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} @@ -943,6 +944,7 @@ task HaplotypeCaller { excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} outputPath: {description: "The location to write the output to.", category: "required"} ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} + gvcf: {description: "Whether the output should be a gvcf", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", From 39694df0a5b98c8ec100143f2dfe72e5c3ca3c14 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2020 11:22:23 +0100 Subject: [PATCH 0153/1208] update changelog --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54c398e0..cbf43a86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,13 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add `GenomicsDBImport` task for GATK. ++ Add `annotationGroups` input to `GenotypeGVCFs` to allow setting multiple + annotation groups. The `StandardAnnotation` group is still used as default. ++ GenotypeGVCFs, only allow one input GVCF file, as the tool also only allows + one input file. ++ Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set + whether output should be a GVCF. + Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located. + Update parameter_meta for TALON, Centrifuge and Minimap2. + Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location. From 1084b28219600d45b3a3849d4e420a02b2d60212 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2020 11:23:43 +0100 Subject: [PATCH 0154/1208] correct mistake --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 7a695953..ff17dadc 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -749,7 +749,7 @@ task GenomicsDBImport { } output { - Array[File] genomicsDbTarArchive = genomicsDBTarFile + File genomicsDbTarArchive = genomicsDBTarFile } runtime { From 7ca9cdc354c16eb490c4d83883f081cab4dee745 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 4 Feb 2020 10:45:54 +0100 Subject: [PATCH 0155/1208] increase memory of umitools dedup --- umi-tools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/umi-tools.wdl b/umi-tools.wdl index d1842c8a..12ef9b45 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -74,6 +74,8 @@ task Dedup { String statsPrefix = "stats" Boolean paired = true + String memory = "10G" + # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -101,6 +103,7 @@ task Dedup { runtime { docker: dockerImage + memory: memory } parameter_meta { From e55983c0b8eed9cbf345551de3f66dae5288857a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 4 Feb 2020 14:36:16 +0100 Subject: [PATCH 0156/1208] increase umi dedup memory --- umi-tools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 12ef9b45..bca28da8 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -74,7 +74,7 @@ task Dedup { String statsPrefix = "stats" Boolean paired = true - String memory = "10G" + String memory = "20G" # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" From c3d68e115af4c77649ea8cfe3689953c6d9a5560 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 5 Feb 2020 08:40:55 +0100 Subject: [PATCH 0157/1208] removed structs --- clever.wdl | 5 +++-- delly.wdl | 11 +++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/clever.wdl b/clever.wdl index 6863e6f8..3fbd49d7 100644 --- a/clever.wdl +++ b/clever.wdl @@ -4,7 +4,8 @@ import "common.wdl" import "bwa.wdl" task Prediction { input { - IndexedBamFile bamFile + File bamFile + File bamIndex BwaIndex bwaIndex String outputPath Int threads = 10 @@ -20,7 +21,7 @@ task Prediction { --use_mapq \ --sorted \ -f \ - ~{bamFile.file} \ + ~{bamFile} \ ~{bwaIndex.fastaFile} \ ~{outputPath} >>> diff --git a/delly.wdl b/delly.wdl index f30e6f48..bda6aa8f 100644 --- a/delly.wdl +++ b/delly.wdl @@ -4,8 +4,11 @@ import "common.wdl" task CallSV { input { - IndexedBamFile bamFile - Reference reference + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + #Reference reference String outputPath Int mem = 15 } @@ -16,8 +19,8 @@ task CallSV { mkdir -p $(dirname ~{outputPath}) delly call \ -o ~{outputPath} \ - -g ~{reference.fasta} \ - ~{bamFile.file} + -g ~{referenceFasta} \ + ~{bamFile} >>> output { From adf32288956dc0e6fb69c081f4133c028f048653 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 5 Feb 2020 09:49:44 +0100 Subject: [PATCH 0158/1208] modify BAM files naming --- clever.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clever.wdl b/clever.wdl index 3fbd49d7..69ea572c 100644 --- a/clever.wdl +++ b/clever.wdl @@ -40,8 +40,8 @@ task Prediction { task Mateclever { input { - File fiteredBamFile - File indexedFiteredBamFile + File fiteredBam + File indexedFiteredBam BwaIndex bwaIndex File predictions String outputPath From 0a9e63b8b496d3a5c20098adcd3e136212f6fe9c Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 5 Feb 2020 10:06:48 +0100 Subject: [PATCH 0159/1208] remove structs from manta task --- manta.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/manta.wdl b/manta.wdl index 38bc1962..76c91a63 100644 --- a/manta.wdl +++ b/manta.wdl @@ -60,8 +60,8 @@ task Somatic { task Germline { input { - IndexedBamFile normalBam - Reference reference + File bamFile + File referenceFasta String runDir File? callRegions File? callRegionsIndex @@ -75,8 +75,8 @@ task Germline { command { set -e configManta.py \ - ~{"--normalBam " + normalBam.file} \ - --referenceFasta ~{reference.fasta} \ + ~{"--bamFile " + bamFile} \ + --referenceFasta ~{referenceFasta} \ ~{"--callRegions " + callRegions} \ --runDir ~{runDir} \ ~{true="--exome" false="" exome} From f68f0c8c6a4c066eb8a10631d99cedfeadc531f2 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 5 Feb 2020 10:12:31 +0100 Subject: [PATCH 0160/1208] fixed BAM naming in mateclever command section --- clever.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clever.wdl b/clever.wdl index 69ea572c..0c22e9f2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -55,7 +55,7 @@ task Mateclever { command <<< set -e mkdir -p $(dirname ~{outputPath}) - echo ~{outputPath} ~{fiteredBamFile} ~{predictions} none > predictions.list + echo ~{outputPath} ~{fiteredBam} ~{predictions} none > predictions.list mateclever \ -T ~{threads} \ -k \ From 8069a59d82e047bcf08e04a440d9dc00bf393063 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 5 Feb 2020 13:33:09 +0100 Subject: [PATCH 0161/1208] changed --bamFile back to --normalBam in Manta tasks --- manta.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manta.wdl b/manta.wdl index 76c91a63..3f9e001f 100644 --- a/manta.wdl +++ b/manta.wdl @@ -75,7 +75,7 @@ task Germline { command { set -e configManta.py \ - ~{"--bamFile " + bamFile} \ + ~{"--normalBam " + bamFile} \ --referenceFasta ~{referenceFasta} \ ~{"--callRegions " + callRegions} \ --runDir ~{runDir} \ From 6833651cbc671dbf264c3adf699b81f43880e438 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 5 Feb 2020 13:44:56 +0100 Subject: [PATCH 0162/1208] add reference index in Manta --- manta.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/manta.wdl b/manta.wdl index 3f9e001f..0bfc6553 100644 --- a/manta.wdl +++ b/manta.wdl @@ -62,6 +62,7 @@ task Germline { input { File bamFile File referenceFasta + File referenceFastaFai String runDir File? callRegions File? callRegionsIndex From a21e51b21a87c027e0fb0b2096b7191dc95640e7 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 5 Feb 2020 14:51:46 +0100 Subject: [PATCH 0163/1208] add bamindex in manta --- manta.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/manta.wdl b/manta.wdl index 0bfc6553..88118cf7 100644 --- a/manta.wdl +++ b/manta.wdl @@ -61,6 +61,7 @@ task Somatic { task Germline { input { File bamFile + File bamIndex File referenceFasta File referenceFastaFai String runDir From e6d73d17b644a74a4a95964edd872c2f61209dfd Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 5 Feb 2020 15:07:36 +0100 Subject: [PATCH 0164/1208] removed all manta output files except for diploidSV.vcf --- manta.wdl | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/manta.wdl b/manta.wdl index 88118cf7..50b334b4 100644 --- a/manta.wdl +++ b/manta.wdl @@ -90,18 +90,21 @@ task Germline { } output { - IndexedVcfFile candidateSmallIndels = object { - file: runDir + "/results/variants/candidateSmallIndels.vcf.gz", - index: runDir + "/results/variants/candidateSmallIndels.vcf.gz.tbi" - } - IndexedVcfFile candidateSV = object { - file: runDir + "/results/variants/candidateSV.vcf.gz", - index: runDir + "/results/variants/candidateSV.vcf.gz.tbi" - } - IndexedVcfFile diploidSV = object { - file: runDir + "/results/variants/diploidSV.vcf.gz", - index: runDir + "/results/variants/diploidSV.vcf.gz.tbi" - } + + File mantaVCF = runDir + "/results/variants/diploidSV.vcf.gz" + # File mantaVCFindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" + # IndexedVcfFile candidateSmallIndels = object { + # file: runDir + "/results/variants/candidateSmallIndels.vcf.gz", + # index: runDir + "/results/variants/candidateSmallIndels.vcf.gz.tbi" + # } + # IndexedVcfFile candidateSV = object { + # file: runDir + "/results/variants/candidateSV.vcf.gz", + # index: runDir + "/results/variants/candidateSV.vcf.gz.tbi" + # } + # IndexedVcfFile diploidSV = object { + # file: runDir + "/results/variants/diploidSV.vcf.gz", + # index: runDir + "/results/variants/diploidSV.vcf.gz.tbi" + # } } runtime { From 651acbe311c5b0fd8e700374b3df7fdfa10d8f45 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 6 Feb 2020 10:45:33 +0100 Subject: [PATCH 0165/1208] update scripts submodule --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index a1783b5c..15f31196 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit a1783b5c789ebef601a8ec5849c4bbfe7dd3f87d +Subproject commit 15f311965d4f50907ced1bd8babec864cbbf5c14 From fecdeb7c86a1875ec9f81653cf3bdbe08418d2cd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 6 Feb 2020 10:49:36 +0100 Subject: [PATCH 0166/1208] add memory paramter_meta for umitools dedup --- umi-tools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/umi-tools.wdl b/umi-tools.wdl index bca28da8..e684ef5a 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -112,6 +112,7 @@ task Dedup { outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} + memory: {description: "The amount of memory required for the task.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 8abdaf2aff949695d5f90d23c8b70b3857aec984 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 6 Feb 2020 11:07:51 +0100 Subject: [PATCH 0167/1208] fix lint script --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 15f31196..ff036a83 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 15f311965d4f50907ced1bd8babec864cbbf5c14 +Subproject commit ff036a83f20a6b20fe39c7b738c2b2e38897515b From 483882f9dcf778196dcbaa32078316bc55a3a1e0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 6 Feb 2020 11:12:29 +0100 Subject: [PATCH 0168/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fda7d62..394d5994 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add tasks for umi-tools dedup and extract. + Add `GenomicsDBImport` task for GATK. + Add `annotationGroups` input to `GenotypeGVCFs` to allow setting multiple annotation groups. The `StandardAnnotation` group is still used as default. From f25df1aeb3fb07a166e3a5b3029537da28c25b3a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 6 Feb 2020 14:14:44 +0100 Subject: [PATCH 0169/1208] scatterregions always outputs ordered scatters --- biopet/biopet.wdl | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index f91f93ea..47606820 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -254,7 +254,7 @@ task ScatterRegions { # linking does not work. String outputDirPath = "scatters" - command { + command <<< set -e -o pipefail mkdir -p ~{outputDirPath} biopet-scatterregions -Xmx~{javaXmx} \ @@ -264,10 +264,29 @@ task ScatterRegions { ~{"-L " + regions} \ ~{"--bamFile " + bamFile} \ ~{true="--notSplitContigs" false="" notSplitContigs} - } + + # Glob messes with order of scatters (10 comes before 1), which causes + # problems at gatherGvcfs + # Therefore we reorder the scatters with python. + # Copy all the scatter files to the CWD so the output matches paths in + # the cwd. + for file in ~{outputDirPath}/* + do cp $file . + done + python << CODE + import os + scatters = os.listdir("~{outputDirPath}") + splitext = [ x.split(".") for x in scatters] + splitnum = [x.split("-") + [y] for x,y in splitext] + ordered = sorted(splitnum, key=lambda x: int(x[1])) + merged = ["{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] + for x in merged: + print(x) + CODE + >>> output { - Array[File] scatters = glob(outputDirPath + "/scatter-*.bed") + Array[File] scatters = read_lines(stdout()) } runtime { From f377d14ec569f7a44008dae9988f44efd2d55f9f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 6 Feb 2020 14:31:49 +0100 Subject: [PATCH 0170/1208] add pedigree file --- gatk.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index ff17dadc..7e0f5c09 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -903,6 +903,7 @@ task HaplotypeCaller { Float? contamination File? dbsnpVCF File? dbsnpVCFIndex + File? pedigree Int? ploidy Boolean gvcf = false @@ -922,7 +923,8 @@ task HaplotypeCaller { ~{"--sample-ploidy " + ploidy} \ ~{true="-L" false="" defined(intervalList)} ~{sep=' -L ' intervalList} \ ~{true="-XL" false="" defined(excludeIntervalList)} ~{sep=' -XL ' excludeIntervalList} \ - ~{"-D" + dbsnpVCF} \ + ~{"-D " + dbsnpVCF} \ + ~{"--pedigree " + pedigree} \ ~{"--contamination-fraction-per-sample-file " + contamination} \ ~{true="-ERC GVCF" false="" gvcf} } From 9fa422f4f30c97fdb8ddd8a7738e2a8e57f4cb7b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 6 Feb 2020 14:35:33 +0100 Subject: [PATCH 0171/1208] add pedigree to GenotypeGVCF --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 7e0f5c09..e5943a41 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -785,6 +785,7 @@ task GenotypeGVCFs { Array[String] annotationGroups = ["StandardAnnotation"] File? dbsnpVCF File? dbsnpVCFIndex + File? pedigree String memory = "18G" String javaXmx = "6G" @@ -799,6 +800,7 @@ task GenotypeGVCFs { -R ~{referenceFasta} \ -O ~{outputPath} \ ~{"-D " + dbsnpVCF} \ + ~{"--pedigree " + pedigree} \ ~{true="-G" false="" length(annotationGroups) > 0} ~{sep=" -G " annotationGroups} \ --only-output-calls-starting-in-intervals \ -V ~{gvcfFile} \ From 2ba6443aa3aa02abca3b16de615baf97a606f6a8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 6 Feb 2020 14:42:17 +0100 Subject: [PATCH 0172/1208] add parameter_meta --- gatk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index e5943a41..78d7a19f 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -831,7 +831,7 @@ task GenotypeGVCFs { annotationGroups: {description: "Which annotation groups will be used for the annotation", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - + pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -957,7 +957,7 @@ task HaplotypeCaller { contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - + pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From d66c4748fa308da34b2759400d4b856d0126a174 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 6 Feb 2020 15:54:28 +0100 Subject: [PATCH 0173/1208] removed output structs from manta --- manta.wdl | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/manta.wdl b/manta.wdl index 50b334b4..79f43d23 100644 --- a/manta.wdl +++ b/manta.wdl @@ -92,19 +92,7 @@ task Germline { output { File mantaVCF = runDir + "/results/variants/diploidSV.vcf.gz" - # File mantaVCFindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" - # IndexedVcfFile candidateSmallIndels = object { - # file: runDir + "/results/variants/candidateSmallIndels.vcf.gz", - # index: runDir + "/results/variants/candidateSmallIndels.vcf.gz.tbi" - # } - # IndexedVcfFile candidateSV = object { - # file: runDir + "/results/variants/candidateSV.vcf.gz", - # index: runDir + "/results/variants/candidateSV.vcf.gz.tbi" - # } - # IndexedVcfFile diploidSV = object { - # file: runDir + "/results/variants/diploidSV.vcf.gz", - # index: runDir + "/results/variants/diploidSV.vcf.gz.tbi" - # } + File mantaVCFindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" } runtime { From 9a3eb2b01928657a97814aeff7a16b7b612e1e10 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 6 Feb 2020 15:55:04 +0100 Subject: [PATCH 0174/1208] add FilterShortReadsBam --- samtools.wdl | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 481b486c..3489e9ec 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -266,3 +266,32 @@ task View { docker: dockerImage } } + +task FilterShortReadsBam { + input { + File bamFile + String outputPathBam + String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + } + + command <<< + set -e + mkdir -p $(dirname ~{outputPathBam}) + samtools view -h ~{bamFile} | \ + awk 'length($10) > 30 || $1 ~/^@/' | \ + samtools view -bS -> ~{outputPathBam} + samtools index ~{outputPathBam} + + >>> + + output { + File filteredBam = outputPathBam + File filteredBamIndex = outputPathBam+".bai" + } + + runtime { + docker: dockerImage + } +} + + From 4e27d421c27dbb70df5b76ea19649b4ecf7005d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 6 Feb 2020 16:08:31 +0100 Subject: [PATCH 0175/1208] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 394d5994..f621caa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add pedigree input for HaplotypeCaller and GenotypeGVCFs. ++ Combined biopet.ScatterRegions and biopet.ReorderedGlobbedScatters into one. + biopet.ScatterRegions now always returns correctly ordered scatters. + Add tasks for umi-tools dedup and extract. + Add `GenomicsDBImport` task for GATK. + Add `annotationGroups` input to `GenotypeGVCFs` to allow setting multiple From a2da269c430437ded6320741b1ad843b35fbe67f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 6 Feb 2020 16:41:28 +0100 Subject: [PATCH 0176/1208] simplify the scatter stuff --- biopet/biopet.wdl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 47606820..8efb91be 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -268,18 +268,13 @@ task ScatterRegions { # Glob messes with order of scatters (10 comes before 1), which causes # problems at gatherGvcfs # Therefore we reorder the scatters with python. - # Copy all the scatter files to the CWD so the output matches paths in - # the cwd. - for file in ~{outputDirPath}/* - do cp $file . - done python << CODE import os scatters = os.listdir("~{outputDirPath}") splitext = [ x.split(".") for x in scatters] splitnum = [x.split("-") + [y] for x,y in splitext] ordered = sorted(splitnum, key=lambda x: int(x[1])) - merged = ["{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] + merged = ["~{outputDirPath}/{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] for x in merged: print(x) CODE From ceb49be2bb684823b13998f807ee39b91dc29a58 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 7 Feb 2020 10:41:12 +0100 Subject: [PATCH 0177/1208] make dockerimages configurable --- bcftools.wdl | 3 ++- clever.wdl | 6 ++++-- delly.wdl | 4 ++-- manta.wdl | 4 ++-- picard.wdl | 3 ++- survivor.wdl | 3 ++- 6 files changed, 14 insertions(+), 9 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index de7730bb..211bdc98 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -4,6 +4,7 @@ task Bcf2Vcf { input { File bcf String outputPath + String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } command <<< @@ -17,6 +18,6 @@ task Bcf2Vcf { } runtime { - docker: "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" + docker: dockerImage } } diff --git a/clever.wdl b/clever.wdl index 0c22e9f2..6128981f 100644 --- a/clever.wdl +++ b/clever.wdl @@ -10,6 +10,7 @@ task Prediction { String outputPath Int threads = 10 Int mem = 15 + String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -33,7 +34,7 @@ task Prediction { runtime { cpu: threads memory: mem - docker: "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" + docker: dockerImage } } @@ -50,6 +51,7 @@ task Mateclever { Int cleverMaxDelLength = 100000 Int maxLengthDiff= 30 Int maxOffset = 150 + String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } command <<< @@ -75,6 +77,6 @@ task Mateclever { runtime { cpu: threads memory: mem - docker: "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" + docker: dockerImage } } diff --git a/delly.wdl b/delly.wdl index bda6aa8f..7765903b 100644 --- a/delly.wdl +++ b/delly.wdl @@ -8,9 +8,9 @@ task CallSV { File bamIndex File referenceFasta File referenceFastaFai - #Reference reference String outputPath Int mem = 15 + String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } @@ -28,7 +28,7 @@ task CallSV { } runtime { - docker: "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" + docker: dockerImage memory: mem } diff --git a/manta.wdl b/manta.wdl index 79f43d23..bf2f27d0 100644 --- a/manta.wdl +++ b/manta.wdl @@ -60,6 +60,7 @@ task Somatic { task Germline { input { + String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" File bamFile File bamIndex File referenceFasta @@ -71,7 +72,6 @@ task Germline { Int cores = 1 Int memory = 4 - String dockerTag = "1.4.0--py27_1" } command { @@ -98,7 +98,7 @@ task Germline { runtime { cpu: cores memory: memory - docker: "quay.io/biocontainers/manta:" + dockerTag + docker: dockerImage } } diff --git a/picard.wdl b/picard.wdl index 896a10cd..db7a0efa 100644 --- a/picard.wdl +++ b/picard.wdl @@ -470,6 +470,7 @@ task SortVcf { task RenameSample { input { + String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" File inputVcf String outputPath String newSampleName @@ -492,7 +493,7 @@ task RenameSample { } runtime { - docker: "quay.io/biocontainers/picard:2.19.0--0" + docker: dockerImage memory: ceil(memory * memoryMultiplier) } } diff --git a/survivor.wdl b/survivor.wdl index f4fdc4b1..8bfb0e4f 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -4,6 +4,7 @@ import "common.wdl" task Merge { input{ + String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" Array[File] filePaths Int breakpointDistance = 1000 Int suppVecs = 2 @@ -36,7 +37,7 @@ task Merge { } runtime { - docker: "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" + docker: dockerImage memory: memory } } From 30a692a5c88386997501484c9d04a8a8984b9b89 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 7 Feb 2020 14:53:06 +0100 Subject: [PATCH 0178/1208] change outputBam to ouputBAM under output parameter --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 3489e9ec..83fec54d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -223,7 +223,7 @@ task View { File inFile File? referenceFasta String outputFileName = "view.bam" - Boolean? includeHeader + Boolean? includeHeader Boolean? outputBam Boolean? uncompressedBamOutput Int? includeFilter @@ -256,7 +256,7 @@ task View { } output { - File outputBam = outputFileName + File outputBAM = outputFileName File outputBamIndex = outputIndexPath } From 2d90e2efef14d344efb0c02fe494f531b634792d Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 7 Feb 2020 15:22:36 +0100 Subject: [PATCH 0179/1208] fix writings according to bioWDL guidelines --- bcftools.wdl | 4 ++-- clever.wdl | 9 ++++----- delly.wdl | 6 ++---- manta.wdl | 1 - samtools.wdl | 4 ++-- survivor.wdl | 4 ++-- 6 files changed, 12 insertions(+), 16 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 211bdc98..0b7e9a22 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -7,11 +7,11 @@ task Bcf2Vcf { String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } - command <<< + command { set -e mkdir -p $(dirname ~{outputPath}) bcftools view ~{bcf} -O v -o ~{outputPath} - >>> + } output { File OutputVcf = "~{outputPath}" diff --git a/clever.wdl b/clever.wdl index 6128981f..4d548bb4 100644 --- a/clever.wdl +++ b/clever.wdl @@ -13,8 +13,7 @@ task Prediction { String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } - - command <<< + command { set -e mkdir -p $(dirname ~{outputPath}) clever \ @@ -25,7 +24,7 @@ task Prediction { ~{bamFile} \ ~{bwaIndex.fastaFile} \ ~{outputPath} - >>> + } output { File predictions = "~{outputPath}/predictions.vcf" @@ -54,7 +53,7 @@ task Mateclever { String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } - command <<< + command { set -e mkdir -p $(dirname ~{outputPath}) echo ~{outputPath} ~{fiteredBam} ~{predictions} none > predictions.list @@ -68,7 +67,7 @@ task Mateclever { ~{bwaIndex.fastaFile} \ predictions.list \ ~{outputPath} - >>> + } output { File matecleverVcf = "~{outputPath}/deletions.vcf" diff --git a/delly.wdl b/delly.wdl index 7765903b..9b697eb1 100644 --- a/delly.wdl +++ b/delly.wdl @@ -13,15 +13,14 @@ task CallSV { String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } - - command <<< + command { set -e mkdir -p $(dirname ~{outputPath}) delly call \ -o ~{outputPath} \ -g ~{referenceFasta} \ ~{bamFile} - >>> + } output { File dellyBcf = "~{outputPath}" @@ -31,5 +30,4 @@ task CallSV { docker: dockerImage memory: mem } - } diff --git a/manta.wdl b/manta.wdl index bf2f27d0..47f7ace1 100644 --- a/manta.wdl +++ b/manta.wdl @@ -90,7 +90,6 @@ task Germline { } output { - File mantaVCF = runDir + "/results/variants/diploidSV.vcf.gz" File mantaVCFindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" } diff --git a/samtools.wdl b/samtools.wdl index 83fec54d..db2461eb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -274,7 +274,7 @@ task FilterShortReadsBam { String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - command <<< + command { set -e mkdir -p $(dirname ~{outputPathBam}) samtools view -h ~{bamFile} | \ @@ -282,7 +282,7 @@ task FilterShortReadsBam { samtools view -bS -> ~{outputPathBam} samtools index ~{outputPathBam} - >>> + } output { File filteredBam = outputPathBam diff --git a/survivor.wdl b/survivor.wdl index 8bfb0e4f..e4984928 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -17,7 +17,7 @@ task Merge { Int memory = 128 } - command <<< + command { set -e mkdir -p $(dirname ~{outputPath}) echo '~{sep="\n" filePaths}' > fileList @@ -30,7 +30,7 @@ task Merge { ~{distanceBySvSize} \ ~{minSize} \ ~{outputPath} - >>> + } output { File mergedVcf = "~{outputPath}" From cfab4a4f7cb2e16cf7fd86d37e69698fd48180e6 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Fri, 7 Feb 2020 15:27:33 +0100 Subject: [PATCH 0180/1208] Update clever.wdl Co-Authored-By: Jasper Boom --- clever.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clever.wdl b/clever.wdl index 4d548bb4..dee92a02 100644 --- a/clever.wdl +++ b/clever.wdl @@ -9,7 +9,7 @@ task Prediction { BwaIndex bwaIndex String outputPath Int threads = 10 - Int mem = 15 + String memory = "15G" String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } From 094f4a7a6aaae02e653987d179a4dcf0a7a45610 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 7 Feb 2020 15:34:25 +0100 Subject: [PATCH 0181/1208] fix some sytax styles --- bcftools.wdl | 4 ++-- clever.wdl | 12 +++++------- delly.wdl | 4 ++-- picard.wdl | 4 ++-- samtools.wdl | 4 ++-- survivor.wdl | 4 ++-- 6 files changed, 15 insertions(+), 17 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 0b7e9a22..47f587ec 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -9,12 +9,12 @@ task Bcf2Vcf { command { set -e - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" bcftools view ~{bcf} -O v -o ~{outputPath} } output { - File OutputVcf = "~{outputPath}" + File OutputVcf = outputPath } runtime { diff --git a/clever.wdl b/clever.wdl index 4d548bb4..e1ba8afe 100644 --- a/clever.wdl +++ b/clever.wdl @@ -1,7 +1,5 @@ version 1.0 -import "common.wdl" -import "bwa.wdl" task Prediction { input { File bamFile @@ -27,7 +25,7 @@ task Prediction { } output { - File predictions = "~{outputPath}/predictions.vcf" + File predictions = outputPath + "/predictions.vcf" } runtime { @@ -46,7 +44,7 @@ task Mateclever { File predictions String outputPath Int threads = 10 - Int mem = 15 + String memory = 15 Int cleverMaxDelLength = 100000 Int maxLengthDiff= 30 Int maxOffset = 150 @@ -55,7 +53,7 @@ task Mateclever { command { set -e - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" echo ~{outputPath} ~{fiteredBam} ~{predictions} none > predictions.list mateclever \ -T ~{threads} \ @@ -70,12 +68,12 @@ task Mateclever { } output { - File matecleverVcf = "~{outputPath}/deletions.vcf" + File matecleverVcf = outputPath + "/deletions.vcf" } runtime { cpu: threads - memory: mem + memory: memory docker: dockerImage } } diff --git a/delly.wdl b/delly.wdl index 9b697eb1..f97d9056 100644 --- a/delly.wdl +++ b/delly.wdl @@ -15,7 +15,7 @@ task CallSV { command { set -e - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" delly call \ -o ~{outputPath} \ -g ~{referenceFasta} \ @@ -23,7 +23,7 @@ task CallSV { } output { - File dellyBcf = "~{outputPath}" + File dellyBcf = outputPath } runtime { diff --git a/picard.wdl b/picard.wdl index db7a0efa..76c0ffe5 100644 --- a/picard.wdl +++ b/picard.wdl @@ -480,7 +480,7 @@ task RenameSample { command { set -e - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" picard -Xmx~{memory}G \ RenameSampleInVcf \ I=~{inputVcf} \ @@ -489,7 +489,7 @@ task RenameSample { } output { - File renamedVcf = "~{outputPath}" + File renamedVcf = outputPath } runtime { diff --git a/samtools.wdl b/samtools.wdl index db2461eb..e1be7c98 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -276,7 +276,7 @@ task FilterShortReadsBam { command { set -e - mkdir -p $(dirname ~{outputPathBam}) + mkdir -p "$(dirname ~{outputPathBam})" samtools view -h ~{bamFile} | \ awk 'length($10) > 30 || $1 ~/^@/' | \ samtools view -bS -> ~{outputPathBam} @@ -286,7 +286,7 @@ task FilterShortReadsBam { output { File filteredBam = outputPathBam - File filteredBamIndex = outputPathBam+".bai" + File filteredBamIndex = outputPathBam + ".bai" } runtime { diff --git a/survivor.wdl b/survivor.wdl index e4984928..9f0ea39b 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -19,7 +19,7 @@ task Merge { command { set -e - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" echo '~{sep="\n" filePaths}' > fileList SURVIVOR merge \ fileList \ @@ -33,7 +33,7 @@ task Merge { } output { - File mergedVcf = "~{outputPath}" + File mergedVcf = outputPath } runtime { From 66793bbfde09afcef9b4c98fe842637d98a72d5d Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 7 Feb 2020 15:40:31 +0100 Subject: [PATCH 0182/1208] small fix --- delly.wdl | 4 ++-- manta.wdl | 2 +- survivor.wdl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/delly.wdl b/delly.wdl index f97d9056..9a47e34e 100644 --- a/delly.wdl +++ b/delly.wdl @@ -9,7 +9,7 @@ task CallSV { File referenceFasta File referenceFastaFai String outputPath - Int mem = 15 + String memory = 15 String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } @@ -28,6 +28,6 @@ task CallSV { runtime { docker: dockerImage - memory: mem + memory: memory } } diff --git a/manta.wdl b/manta.wdl index 47f7ace1..3dd8703d 100644 --- a/manta.wdl +++ b/manta.wdl @@ -71,7 +71,7 @@ task Germline { Boolean exome = false Int cores = 1 - Int memory = 4 + String memory = 4 } command { diff --git a/survivor.wdl b/survivor.wdl index 9f0ea39b..0ff513b3 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -14,7 +14,7 @@ task Merge { Int minSize = 30 String sample String outputPath - Int memory = 128 + String memory = 128 } command { From 7823ef99b20b16b7ca1330f1e3e384b5ddf763ea Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 10:58:00 +0100 Subject: [PATCH 0183/1208] add scripts directory --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index ff036a83..56ee7416 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit ff036a83f20a6b20fe39c7b738c2b2e38897515b +Subproject commit 56ee74167ba8e6326a923f6f25bfd2d39847ecdb From aca226193e23d2d06cd4c58f6b225f95973aeea6 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 11:05:13 +0100 Subject: [PATCH 0184/1208] removed memort multiplier from picard --- picard.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/picard.wdl b/picard.wdl index 3648710c..4abd7ce0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -618,14 +618,14 @@ task RenameSample { File inputVcf String outputPath String newSampleName - Int memory = 8 - Float memoryMultiplier = 3.0 + String memory = "24G" + String javaXmx = "8G" } command { set -e mkdir -p "$(dirname ~{outputPath})" - picard -Xmx~{memory}G \ + picard -Xmx~{javaXmx} \ RenameSampleInVcf \ I=~{inputVcf} \ O=~{outputPath} \ @@ -638,7 +638,7 @@ task RenameSample { runtime { docker: dockerImage - memory: ceil(memory * memoryMultiplier) + memory = memory } } From 8cbc37426897f1ccecfc9eaef98fe96fae46fff8 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 11:07:12 +0100 Subject: [PATCH 0185/1208] changed memory to string --- clever.wdl | 2 +- delly.wdl | 2 +- manta.wdl | 2 +- survivor.wdl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clever.wdl b/clever.wdl index d40f2ed2..34a96122 100644 --- a/clever.wdl +++ b/clever.wdl @@ -44,7 +44,7 @@ task Mateclever { File predictions String outputPath Int threads = 10 - String memory = 15 + String memory = "15G" Int cleverMaxDelLength = 100000 Int maxLengthDiff= 30 Int maxOffset = 150 diff --git a/delly.wdl b/delly.wdl index 9a47e34e..8fbf599c 100644 --- a/delly.wdl +++ b/delly.wdl @@ -9,7 +9,7 @@ task CallSV { File referenceFasta File referenceFastaFai String outputPath - String memory = 15 + String memory = "15G" String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } diff --git a/manta.wdl b/manta.wdl index 23a34d92..4b970fbc 100644 --- a/manta.wdl +++ b/manta.wdl @@ -88,7 +88,7 @@ task Germline { Boolean exome = false Int cores = 1 - String memory = 4 + String memory = "4G" } command { diff --git a/survivor.wdl b/survivor.wdl index 0ff513b3..f2e4ae29 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -14,7 +14,7 @@ task Merge { Int minSize = 30 String sample String outputPath - String memory = 128 + String memory = "128G" } command { From 953c9d08c960f0010f1d8ec88a2b5786ce4dbef4 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Mon, 10 Feb 2020 11:07:17 +0100 Subject: [PATCH 0186/1208] Update samtools.wdl Co-Authored-By: DavyCats --- samtools.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index b630d394..7e44a906 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -307,7 +307,7 @@ task View { File inFile File? referenceFasta String outputFileName = "view.bam" - Boolean? includeHeader + Boolean? includeHeader Boolean? outputBam Boolean? uncompressedBamOutput Int? includeFilter @@ -394,4 +394,3 @@ task FilterShortReadsBam { } } - From 4a44260750c4ba22f8d33fc1cf474b58bffc1e2d Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Mon, 10 Feb 2020 11:08:06 +0100 Subject: [PATCH 0187/1208] Update samtools.wdl Gave Boolean a default value Co-Authored-By: DavyCats --- samtools.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 7e44a906..b14f0da9 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -309,7 +309,7 @@ task View { String outputFileName = "view.bam" Boolean? includeHeader Boolean? outputBam - Boolean? uncompressedBamOutput + Boolean uncompressedBamOutput = false Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter @@ -393,4 +393,3 @@ task FilterShortReadsBam { docker: dockerImage } } - From 5d2c1b2235932ada1eeb64162a8b6b7fff779876 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 11:12:02 +0100 Subject: [PATCH 0188/1208] changed OutputVcf to outputVcf --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 47f587ec..8fa3fc37 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -14,7 +14,7 @@ task Bcf2Vcf { } output { - File OutputVcf = outputPath + File outputVcf = outputPath } runtime { From 14de39c5b904827153642c06f2f42fa756880e61 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 14:20:18 +0100 Subject: [PATCH 0189/1208] removed bam.bai extension in samtools, changed in to just .bai --- samtools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index b14f0da9..b69c28b8 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -371,6 +371,7 @@ task FilterShortReadsBam { input { File bamFile String outputPathBam + String outputPathBamIndex String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -379,14 +380,13 @@ task FilterShortReadsBam { mkdir -p "$(dirname ~{outputPathBam})" samtools view -h ~{bamFile} | \ awk 'length($10) > 30 || $1 ~/^@/' | \ - samtools view -bS -> ~{outputPathBam} - samtools index ~{outputPathBam} - + samtools view -bS -> ~{outputPathBam} + samtools index ~{outputPathBam} ~{outputPathBamIndex} } output { File filteredBam = outputPathBam - File filteredBamIndex = outputPathBam + ".bai" + File filteredBamIndex = outputPathBamIndex } runtime { From 0ec5ac324e41fdb983a88d9b6e1395da9dc9fff2 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 14:20:33 +0100 Subject: [PATCH 0190/1208] minor fix --- clever.wdl | 4 ++-- manta.wdl | 6 +++--- picard.wdl | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clever.wdl b/clever.wdl index 34a96122..54f81082 100644 --- a/clever.wdl +++ b/clever.wdl @@ -1,5 +1,5 @@ version 1.0 - +import "bwa.wdl" task Prediction { input { File bamFile @@ -30,7 +30,7 @@ task Prediction { runtime { cpu: threads - memory: mem + memory: memory docker: dockerImage } diff --git a/manta.wdl b/manta.wdl index 4b970fbc..09a7c4ed 100644 --- a/manta.wdl +++ b/manta.wdl @@ -88,7 +88,7 @@ task Germline { Boolean exome = false Int cores = 1 - String memory = "4G" + Int memoryGb = 4 } command { @@ -103,7 +103,7 @@ task Germline { ~{runDir}/runWorkflow.py \ -m local \ -j ~{cores} \ - -g ~{memory} + -g ~{memoryGb} } output { @@ -113,7 +113,7 @@ task Germline { runtime { cpu: cores - memory: memory + memory: "~{memoryGb}G" docker: dockerImage } } diff --git a/picard.wdl b/picard.wdl index 4abd7ce0..cd923bb7 100644 --- a/picard.wdl +++ b/picard.wdl @@ -638,7 +638,7 @@ task RenameSample { runtime { docker: dockerImage - memory = memory + memory: memory } } From ee1c59a6a06ecd8b42e8925973fdcf5a616b05de Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 15:23:38 +0100 Subject: [PATCH 0191/1208] add parameter meta --- clever.wdl | 21 ++++++++++++++++++++- delly.wdl | 10 ++++++++++ manta.wdl | 14 ++++++++++++++ picard.wdl | 8 ++++++++ survivor.wdl | 8 ++++++++ 5 files changed, 60 insertions(+), 1 deletion(-) diff --git a/clever.wdl b/clever.wdl index 54f81082..863bf7ef 100644 --- a/clever.wdl +++ b/clever.wdl @@ -23,7 +23,7 @@ task Prediction { ~{bwaIndex.fastaFile} \ ~{outputPath} } - + output { File predictions = outputPath + "/predictions.vcf" } @@ -34,6 +34,15 @@ task Prediction { docker: dockerImage } + parameter_meta { + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index bam file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + bwaIndex: {description: "The BWA index files.", category: "required"} + } + } task Mateclever { @@ -76,4 +85,14 @@ task Mateclever { memory: memory docker: dockerImage } + + parameter_meta { + fiteredBam: {description: "The bam file where sequences less than 30bp were removed.", category: "required"} + indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + bwaIndex: {description: "The BWA index files.", category: "required"} + predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} + } } diff --git a/delly.wdl b/delly.wdl index 8fbf599c..4dffa5f1 100644 --- a/delly.wdl +++ b/delly.wdl @@ -30,4 +30,14 @@ task CallSV { docker: dockerImage memory: memory } + + parameter_meta { + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index bam file.", category: "required"} + referenceFasta: referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} + referenceFastaFai: { description: "Fasta index (.fai) file of the reference", category: "required" } + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } diff --git a/manta.wdl b/manta.wdl index 09a7c4ed..cd6e0a86 100644 --- a/manta.wdl +++ b/manta.wdl @@ -116,5 +116,19 @@ task Germline { memory: "~{memoryGb}G" docker: dockerImage } + + parameter_meta { + runDir: {description: "The directory to use as run/output directory.", category: "common"} + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index bam file.", category: "required"} + referenceFasta: referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} + referenceFastaFai: { description: "Fasta index (.fai) file of the reference", category: "required" } + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} + callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} + } } diff --git a/picard.wdl b/picard.wdl index cd923bb7..5b3c3c65 100644 --- a/picard.wdl +++ b/picard.wdl @@ -640,5 +640,13 @@ task RenameSample { docker: dockerImage memory: memory } + + parameter_meta { + inputVcf: {description: "The VCF file to process.", category: "required"} + newSampleName: {description: "A string to replace the old sample name.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } diff --git a/survivor.wdl b/survivor.wdl index f2e4ae29..8763053d 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -40,4 +40,12 @@ task Merge { docker: dockerImage memory: memory } + + parameter_meta { + filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "required"} + sample: {description: "The name of the sample", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } From 4e44804ab3e3bf0c4d736a00a74e585b8e92b37a Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 15:44:25 +0100 Subject: [PATCH 0192/1208] update scripts submodule --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 56ee7416..ff036a83 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 56ee74167ba8e6326a923f6f25bfd2d39847ecdb +Subproject commit ff036a83f20a6b20fe39c7b738c2b2e38897515b From a4d2969f20acb2417e485ea31110be1e39774778 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 15:45:23 +0100 Subject: [PATCH 0193/1208] fix parameter meta --- delly.wdl | 4 ++-- manta.wdl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/delly.wdl b/delly.wdl index 4dffa5f1..baaef56c 100644 --- a/delly.wdl +++ b/delly.wdl @@ -34,8 +34,8 @@ task CallSV { parameter_meta { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} - referenceFasta: referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} - referenceFastaFai: { description: "Fasta index (.fai) file of the reference", category: "required" } + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } outputPath: {description: "The location the output VCF file should be written.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/manta.wdl b/manta.wdl index cd6e0a86..30fc3f7d 100644 --- a/manta.wdl +++ b/manta.wdl @@ -121,8 +121,8 @@ task Germline { runDir: {description: "The directory to use as run/output directory.", category: "common"} bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} - referenceFasta: referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} - referenceFastaFai: { description: "Fasta index (.fai) file of the reference", category: "required" } + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } outputPath: {description: "The location the output VCF file should be written.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 002fe47dd04ca66ac9b80baf4fde6da98cd0a946 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 15:52:41 +0100 Subject: [PATCH 0194/1208] minor fix: parameter meta in manta.wdl --- manta.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/manta.wdl b/manta.wdl index 30fc3f7d..d44ff0a4 100644 --- a/manta.wdl +++ b/manta.wdl @@ -123,7 +123,6 @@ task Germline { bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } - outputPath: {description: "The location the output VCF file should be written.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} From 2050f3fed42c63381cd6b5d0f7ae88f0ca3e838f Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 10 Feb 2020 16:32:58 +0100 Subject: [PATCH 0195/1208] update parameters meta --- clever.wdl | 4 ++++ delly.wdl | 1 + manta.wdl | 2 ++ picard.wdl | 2 ++ survivor.wdl | 7 +++++++ 5 files changed, 16 insertions(+) diff --git a/clever.wdl b/clever.wdl index 863bf7ef..e1b17779 100644 --- a/clever.wdl +++ b/clever.wdl @@ -41,6 +41,8 @@ task Prediction { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} bwaIndex: {description: "The BWA index files.", category: "required"} + memory: {description: "The memory required to run the programs", category: "common"} + threads: {description: "The the number of threads required to run a program", category: "common"} } } @@ -94,5 +96,7 @@ task Mateclever { category: "advanced"} bwaIndex: {description: "The BWA index files.", category: "required"} predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} + memory: {description: "The memory required to run the programs", category: "common"} + threads: {description: "The the number of threads required to run a program", category: "common"} } } diff --git a/delly.wdl b/delly.wdl index baaef56c..940b5d35 100644 --- a/delly.wdl +++ b/delly.wdl @@ -39,5 +39,6 @@ task CallSV { outputPath: {description: "The location the output VCF file should be written.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memory: {description: "The memory required to run the programs", category: "common"} } } diff --git a/manta.wdl b/manta.wdl index d44ff0a4..9c79786c 100644 --- a/manta.wdl +++ b/manta.wdl @@ -128,6 +128,8 @@ task Germline { callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} + memoryGb: {description: "The memory required to run the manta", category: "common"} + cores: {description: "The the number of cores required to run a program", category: "common"} } } diff --git a/picard.wdl b/picard.wdl index 5b3c3c65..6f85bfd8 100644 --- a/picard.wdl +++ b/picard.wdl @@ -647,6 +647,8 @@ task RenameSample { outputPath: {description: "The location the output VCF file should be written.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + javaXmx: {description: "The max. memory allocated for JAVA", category: "common"} + memory: {description: "The memory required to run the programs", category: "common"} } } diff --git a/survivor.wdl b/survivor.wdl index 8763053d..bf147816 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -47,5 +47,12 @@ task Merge { outputPath: {description: "The location the output VCF file should be written.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + minSize: {description: "The mimimum size of SV to be merged", category: "required"} + distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "required"} + strandType: {description: "A boolean to include strand type of an SV to be merged", category: "required"} + svType: {description: "A boolean to include the type SV to be merged", category: "required"} + suppVecs: {description: "The minimum number of SV callers to support the merging", category: "required"} + breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "required"} + memory: {description: "The memory required to run the programs", category: "common"} } } From e5648f94b09b8f336441c84a5a5f1977b2e08072 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Tue, 11 Feb 2020 08:23:03 +0100 Subject: [PATCH 0196/1208] Update clever.wdl Co-Authored-By: Jasper Boom --- clever.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/clever.wdl b/clever.wdl index e1b17779..3dd41b38 100644 --- a/clever.wdl +++ b/clever.wdl @@ -44,7 +44,6 @@ task Prediction { memory: {description: "The memory required to run the programs", category: "common"} threads: {description: "The the number of threads required to run a program", category: "common"} } - } task Mateclever { From 9aa0b63a36704c5773687e9b6138e53684517381 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Tue, 11 Feb 2020 08:23:30 +0100 Subject: [PATCH 0197/1208] Update clever.wdl Co-Authored-By: Jasper Boom --- clever.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clever.wdl b/clever.wdl index 3dd41b38..fb67a2a8 100644 --- a/clever.wdl +++ b/clever.wdl @@ -13,7 +13,7 @@ task Prediction { command { set -e - mkdir -p $(dirname ~{outputPath}) + mkdir -p "$(dirname ~{outputPath})" clever \ -T ~{threads} \ --use_mapq \ From 45c737af1c551aa1d3c8844c11a2a630e2639fa3 Mon Sep 17 00:00:00 2001 From: cedrick Date: Tue, 11 Feb 2020 09:28:05 +0100 Subject: [PATCH 0198/1208] update clever meta --- clever.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clever.wdl b/clever.wdl index fb67a2a8..e395d7b9 100644 --- a/clever.wdl +++ b/clever.wdl @@ -97,5 +97,9 @@ task Mateclever { predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} memory: {description: "The memory required to run the programs", category: "common"} threads: {description: "The the number of threads required to run a program", category: "common"} + + maxOffset: {description: "Maximum center distance between split-read and read-pair deletion to be considered identical", category: "common"} + maxLengthDiff: {description: "Maximum length difference between split-read and read-pair deletion to be considered identical ", category: "common"} + cleverMaxDelLength: {description: "Maximum deletion length to look for from clever predictions.", category: "common"} } } From bb0a7b9a4f5a665c36e36e2322f6322668376f3e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Feb 2020 10:10:40 +0100 Subject: [PATCH 0199/1208] Reorder some stuff --- clever.wdl | 55 +++++++++++++++++++++++++++------------------------- delly.wdl | 12 ++++++------ manta.wdl | 20 +++++++++---------- picard.wdl | 21 ++++++++++++++------ samtools.wdl | 10 ++++++++++ survivor.wdl | 27 +++++++++++++------------- 6 files changed, 83 insertions(+), 62 deletions(-) diff --git a/clever.wdl b/clever.wdl index fb67a2a8..cc8bd8bf 100644 --- a/clever.wdl +++ b/clever.wdl @@ -1,17 +1,20 @@ -version 1.0 +version 1.0 + import "bwa.wdl" + task Prediction { input { File bamFile File bamIndex BwaIndex bwaIndex - String outputPath - Int threads = 10 + String outputPath + + Int threads = 10 String memory = "15G" String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" - } - - command { + } + + command { set -e mkdir -p "$(dirname ~{outputPath})" clever \ @@ -22,27 +25,27 @@ task Prediction { ~{bamFile} \ ~{bwaIndex.fastaFile} \ ~{outputPath} - } - + } + output { File predictions = outputPath + "/predictions.vcf" - } - + } + runtime { cpu: threads memory: memory docker: dockerImage - } + } parameter_meta { + # inputs bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} bwaIndex: {description: "The BWA index files.", category: "required"} - memory: {description: "The memory required to run the programs", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "common"} + memory: {description: "The memory required to run the programs", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -53,11 +56,12 @@ task Mateclever { BwaIndex bwaIndex File predictions String outputPath - Int threads = 10 - String memory = "15G" Int cleverMaxDelLength = 100000 Int maxLengthDiff= 30 - Int maxOffset = 150 + Int maxOffset = 150 + + Int threads = 10 + String memory = "15G" String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -76,26 +80,25 @@ task Mateclever { predictions.list \ ~{outputPath} } - + output { - File matecleverVcf = outputPath + "/deletions.vcf" + File matecleverVcf = outputPath + "/deletions.vcf" } - + runtime { cpu: threads memory: memory - docker: dockerImage + docker: dockerImage } parameter_meta { fiteredBam: {description: "The bam file where sequences less than 30bp were removed.", category: "required"} indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} bwaIndex: {description: "The BWA index files.", category: "required"} predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} - memory: {description: "The memory required to run the programs", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "common"} + memory: {description: "The memory required to run the programs", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/delly.wdl b/delly.wdl index 940b5d35..482cd8ad 100644 --- a/delly.wdl +++ b/delly.wdl @@ -8,11 +8,12 @@ task CallSV { File bamIndex File referenceFasta File referenceFastaFai - String outputPath + String outputPath + String memory = "15G" String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } - + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -25,10 +26,10 @@ task CallSV { output { File dellyBcf = outputPath } - + runtime { - docker: dockerImage memory: memory + docker: dockerImage } parameter_meta { @@ -37,8 +38,7 @@ task CallSV { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } outputPath: {description: "The location the output VCF file should be written.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} memory: {description: "The memory required to run the programs", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/manta.wdl b/manta.wdl index 9c79786c..f1ad5d08 100644 --- a/manta.wdl +++ b/manta.wdl @@ -57,6 +57,7 @@ task Somatic { } parameter_meta { + # inputs tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} normalBam: {description: "The normal/control sample's BAM file.", category: "common"} @@ -67,17 +68,14 @@ task Somatic { callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task Germline { input { - String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" File bamFile File bamIndex File referenceFasta @@ -86,9 +84,10 @@ task Germline { File? callRegions File? callRegionsIndex Boolean exome = false - + Int cores = 1 Int memoryGb = 4 + String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } command { @@ -99,7 +98,7 @@ task Germline { ~{"--callRegions " + callRegions} \ --runDir ~{runDir} \ ~{true="--exome" false="" exome} - + ~{runDir}/runWorkflow.py \ -m local \ -j ~{cores} \ @@ -110,7 +109,7 @@ task Germline { File mantaVCF = runDir + "/results/variants/diploidSV.vcf.gz" File mantaVCFindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" } - + runtime { cpu: cores memory: "~{memoryGb}G" @@ -118,18 +117,17 @@ task Germline { } parameter_meta { - runDir: {description: "The directory to use as run/output directory.", category: "common"} bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + runDir: {description: "The directory to use as run/output directory.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - memoryGb: {description: "The memory required to run the manta", category: "common"} cores: {description: "The the number of cores required to run a program", category: "common"} + memoryGb: {description: "The memory required to run the manta", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/picard.wdl b/picard.wdl index 6f85bfd8..ae02a0d7 100644 --- a/picard.wdl +++ b/picard.wdl @@ -33,6 +33,7 @@ task BedToIntervalList { } parameter_meta { + # inputs bedFile: {description: "A bed file.", category: "required"} dict: {description: "A sequence dict file.", category: "required"} outputPath: {description: "The location the output interval list should be written to.", @@ -138,6 +139,7 @@ task CollectMultipleMetrics { } parameter_meta { + # inputs inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} @@ -209,6 +211,7 @@ task CollectRnaSeqMetrics { } parameter_meta { + # inputs inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} @@ -268,6 +271,7 @@ task CollectTargetedPcrMetrics { } parameter_meta { + # inputs inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} @@ -326,6 +330,7 @@ task GatherBamFiles { } parameter_meta { + # inputs inputBams: {description: "The BAM files to be merged together.", category: "required"} inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} @@ -368,6 +373,7 @@ task GatherVcfs { } parameter_meta { + # inputs inputVcfs: {description: "The VCF files to be merged together.", category: "required"} inputVcfIndexes: {description: "The indexes of the input VCF files.", category: "required"} outputVcfPath: {description: "The path where the merged VCF file will be written.", caregory: "required"} @@ -434,6 +440,7 @@ task MarkDuplicates { } parameter_meta { + # inputs inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} inputBamIndexes: {description: "Th eindexes for the input BAM files.", category: "required"} outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} @@ -483,6 +490,7 @@ task MergeVCFs { } parameter_meta { + # inputs inputVCFs: {description: "The VCF files to be merged.", category: "required"} inputVCFsIndexes: {description: "The indexes of the VCF files.", category: "required"} outputVcfPath: {description: "The location the output VCF file should be written to.", category: "required"} @@ -600,6 +608,7 @@ task SortVcf { } parameter_meta { + # inputs vcfFiles: {description: "The VCF files to merge and sort.", category: "required"} outputVcfPath: {description: "The location the sorted VCF files should be written to.", category: "required"} dict: {description: "A sequence dictionary matching the VCF files.", category: "advanced"} @@ -614,12 +623,13 @@ task SortVcf { task RenameSample { input { - String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" File inputVcf String outputPath String newSampleName + String memory = "24G" String javaXmx = "8G" + String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" } command { @@ -642,13 +652,12 @@ task RenameSample { } parameter_meta { + # inputs inputVcf: {description: "The VCF file to process.", category: "required"} - newSampleName: {description: "A string to replace the old sample name.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - javaXmx: {description: "The max. memory allocated for JAVA", category: "common"} + newSampleName: {description: "A string to replace the old sample name.", category: "required"} memory: {description: "The memory required to run the programs", category: "common"} + javaXmx: {description: "The max. memory allocated for JAVA", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/samtools.wdl b/samtools.wdl index b69c28b8..ac8fedfe 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -28,6 +28,7 @@ task BgzipAndIndex { } parameter_meta { + # inputs inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} @@ -70,6 +71,7 @@ task Index { } parameter_meta { + # inputs bamFile: {description: "The BAM file for which an index should be made.", category: "required"} outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"} @@ -105,6 +107,7 @@ task Merge { } parameter_meta { + # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} @@ -136,6 +139,7 @@ task SortByName { } parameter_meta { + # inputs bamFile: {description: "The BAM file to get sorted.", category: "required"} outputBamPath: {description: "The location the sorted BAM file should be written to.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -166,6 +170,7 @@ task Markdup { } parameter_meta { + # inputs inputBam: {description: "The BAM file to be processed.", category: "required"} outputBamPath: {description: "The location of the output BAM file.", category: "required"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -196,6 +201,7 @@ task Flagstat { } parameter_meta { + # inputs inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} outputPath: {description: "The location the ouput should be written to.", category: "required"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -249,6 +255,7 @@ task Fastq { } parameter_meta { + # inputs inputBam: {description: "The bam file to process.", category: "required"} outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} @@ -293,6 +300,7 @@ task Tabix { } parameter_meta { + # inputs inputFile: {description: "The file to be indexed.", category: "required"} outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} @@ -351,6 +359,7 @@ task View { } parameter_meta { + # inputs inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} @@ -372,6 +381,7 @@ task FilterShortReadsBam { File bamFile String outputPathBam String outputPathBamIndex + String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } diff --git a/survivor.wdl b/survivor.wdl index bf147816..8539d223 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -4,7 +4,6 @@ import "common.wdl" task Merge { input{ - String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" Array[File] filePaths Int breakpointDistance = 1000 Int suppVecs = 2 @@ -14,10 +13,12 @@ task Merge { Int minSize = 30 String sample String outputPath + String memory = "128G" + String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } - command { + command { set -e mkdir -p "$(dirname ~{outputPath})" echo '~{sep="\n" filePaths}' > fileList @@ -30,29 +31,29 @@ task Merge { ~{distanceBySvSize} \ ~{minSize} \ ~{outputPath} - } + } output { File mergedVcf = outputPath } - + runtime { - docker: dockerImage memory: memory + docker: dockerImage } parameter_meta { + # inputs filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "required"} + breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "required"} + suppVecs: {description: "The minimum number of SV callers to support the merging", category: "required"} + svType: {description: "A boolean to include the type SV to be merged", category: "required"} + strandType: {description: "A boolean to include strand type of an SV to be merged", category: "required"} + distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "required"} + minSize: {description: "The mimimum size of SV to be merged", category: "required"} sample: {description: "The name of the sample", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - minSize: {description: "The mimimum size of SV to be merged", category: "required"} - distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "required"} - strandType: {description: "A boolean to include strand type of an SV to be merged", category: "required"} - svType: {description: "A boolean to include the type SV to be merged", category: "required"} - suppVecs: {description: "The minimum number of SV callers to support the merging", category: "required"} - breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "required"} memory: {description: "The memory required to run the programs", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From f3a55f1061552ce63fa83ee3dd367c6acf40114a Mon Sep 17 00:00:00 2001 From: cedrick Date: Tue, 11 Feb 2020 10:38:48 +0100 Subject: [PATCH 0200/1208] update CHANGE.log --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f621caa9..39070993 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,13 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Update WDL task Picard (Add task RenameSample) ++ Update WDL task Samtools (Add task FilterShortReadsBam) ++ Add WDL task for BCFtools ++ Add WDL task for SURVIVOR ++ Update WDL task Manta (Add germline SV calling) ++ Add WDL task for Delly ++ Add WDL task for Clever (and Mate-Clever) + Add pedigree input for HaplotypeCaller and GenotypeGVCFs. + Combined biopet.ScatterRegions and biopet.ReorderedGlobbedScatters into one. biopet.ScatterRegions now always returns correctly ordered scatters. From ae610e8ce8d3ac46514813af930b3e10069ff61f Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 11 Feb 2020 10:53:04 +0100 Subject: [PATCH 0201/1208] update CHANGELOG.md --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39070993..52f06884 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,8 +13,8 @@ version 2.2.0-dev --------------------------- + Update WDL task Picard (Add task RenameSample) + Update WDL task Samtools (Add task FilterShortReadsBam) -+ Add WDL task for BCFtools -+ Add WDL task for SURVIVOR ++ Add WDL task for BCFtools (bcf to vcf) ++ Add WDL task for SURVIVOR (merge) + Update WDL task Manta (Add germline SV calling) + Add WDL task for Delly + Add WDL task for Clever (and Mate-Clever) From 338f85339b62da7921f2d5d68d0870e81a80d542 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Feb 2020 11:29:19 +0100 Subject: [PATCH 0202/1208] Reorder a few things --- bcftools.wdl | 7 +++-- clever.wdl | 65 ++++++++++++++++++++------------------- delly.wdl | 1 + manta.wdl | 86 ++++++++++++++++++++++++++-------------------------- 4 files changed, 81 insertions(+), 78 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 8fa3fc37..fbc3639d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -4,19 +4,20 @@ task Bcf2Vcf { input { File bcf String outputPath + String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } - + command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view ~{bcf} -O v -o ~{outputPath} } - + output { File outputVcf = outputPath } - + runtime { docker: dockerImage } diff --git a/clever.wdl b/clever.wdl index b61a840b..93b3759f 100644 --- a/clever.wdl +++ b/clever.wdl @@ -2,13 +2,17 @@ version 1.0 import "bwa.wdl" -task Prediction { +task Mateclever { input { - File bamFile - File bamIndex + File fiteredBam + File indexedFiteredBam BwaIndex bwaIndex + File predictions String outputPath - + Int cleverMaxDelLength = 100000 + Int maxLengthDiff= 30 + Int maxOffset = 150 + Int threads = 10 String memory = "15G" String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" @@ -17,18 +21,21 @@ task Prediction { command { set -e mkdir -p "$(dirname ~{outputPath})" - clever \ + echo ~{outputPath} ~{fiteredBam} ~{predictions} none > predictions.list + mateclever \ -T ~{threads} \ - --use_mapq \ - --sorted \ + -k \ -f \ - ~{bamFile} \ + -M ~{cleverMaxDelLength} \ + -z ~{maxLengthDiff} \ + -o ~{maxOffset} \ ~{bwaIndex.fastaFile} \ + predictions.list \ ~{outputPath} } output { - File predictions = outputPath + "/predictions.vcf" + File matecleverVcf = outputPath + "/deletions.vcf" } runtime { @@ -39,26 +46,26 @@ task Prediction { parameter_meta { # inputs - bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} + fiteredBam: {description: "The bam file where sequences less than 30bp were removed.", category: "required"} + indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} + predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + cleverMaxDelLength: {description: "Maximum deletion length to look for from clever predictions.", category: "common"} + maxLengthDiff: {description: "Maximum length difference between split-read and read-pair deletion to be considered identical ", category: "common"} + maxOffset: {description: "Maximum center distance between split-read and read-pair deletion to be considered identical", category: "common"} threads: {description: "The the number of threads required to run a program", category: "common"} memory: {description: "The memory required to run the programs", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Mateclever { +task Prediction { input { - File fiteredBam - File indexedFiteredBam + File bamFile + File bamIndex BwaIndex bwaIndex - File predictions String outputPath - Int cleverMaxDelLength = 100000 - Int maxLengthDiff= 30 - Int maxOffset = 150 Int threads = 10 String memory = "15G" @@ -68,21 +75,18 @@ task Mateclever { command { set -e mkdir -p "$(dirname ~{outputPath})" - echo ~{outputPath} ~{fiteredBam} ~{predictions} none > predictions.list - mateclever \ + clever \ -T ~{threads} \ - -k \ + --use_mapq \ + --sorted \ -f \ - -M ~{cleverMaxDelLength} \ - -z ~{maxLengthDiff} \ - -o ~{maxOffset} \ + ~{bamFile} \ ~{bwaIndex.fastaFile} \ - predictions.list \ ~{outputPath} } output { - File matecleverVcf = outputPath + "/deletions.vcf" + File predictions = outputPath + "/predictions.vcf" } runtime { @@ -92,15 +96,12 @@ task Mateclever { } parameter_meta { - fiteredBam: {description: "The bam file where sequences less than 30bp were removed.", category: "required"} - indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} - predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "common"} - maxOffset: {description: "Maximum center distance between split-read and read-pair deletion to be considered identical", category: "common"} - maxLengthDiff: {description: "Maximum length difference between split-read and read-pair deletion to be considered identical ", category: "common"} - cleverMaxDelLength: {description: "Maximum deletion length to look for from clever predictions.", category: "common"} memory: {description: "The memory required to run the programs", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/delly.wdl b/delly.wdl index 482cd8ad..2c9bcafb 100644 --- a/delly.wdl +++ b/delly.wdl @@ -33,6 +33,7 @@ task CallSV { } parameter_meta { + # inputs bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} diff --git a/manta.wdl b/manta.wdl index f1ad5d08..ef9a347d 100644 --- a/manta.wdl +++ b/manta.wdl @@ -2,15 +2,13 @@ version 1.0 import "common.wdl" -task Somatic { +task Germline { input { - File tumorBam - File tumorBamIndex - File? normalBam - File? normalBamIndex + File bamFile + File bamIndex File referenceFasta File referenceFastaFai - String runDir = "./manta_run" + String runDir File? callRegions File? callRegionsIndex Boolean exome = false @@ -21,9 +19,9 @@ task Somatic { } command { + set -e configManta.py \ - ~{"--normalBam " + normalBam} \ - ~{"--tumorBam " + tumorBam} \ + ~{"--normalBam " + bamFile} \ --referenceFasta ~{referenceFasta} \ ~{"--callRegions " + callRegions} \ --runDir ~{runDir} \ @@ -36,18 +34,8 @@ task Somatic { } output { - File candidateSmallIndelsVcf = runDir + "/results/variants/candidateSmallIndels.vcf.gz" - File candidateSmallIndelsVcfIndex = runDir + "/results/variants/candidateSmallIndels.vcf.gz.tbi" - File candidateSVVcf = runDir + "/results/variants/candidateSV.vcf.gz" - File candidatSVVcfIndex = runDir + "/results/variants/candidateSV.vcf.gz.tbi" - File tumorSVVcf = if defined(normalBam) - then runDir + "/results/variants/somaticSV.vcf.gz" - else runDir + "/results/variants/tumorSV.vcf.gz" - File tumorSVVcfIndex = if defined(normalBam) - then runDir + "/results/variants/somaticSV.vcf.gz.tbi" - else runDir + "/results/variants/tumorSV.vcf.gz.tbi" - File? diploidSV = runDir + "/results/variants/diploidSV.vcf.gz" - File? diploidSVindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" + File mantaVCF = runDir + "/results/variants/diploidSV.vcf.gz" + File mantaVCFindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" } runtime { @@ -58,29 +46,29 @@ task Somatic { parameter_meta { # inputs - tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} - tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} - normalBam: {description: "The normal/control sample's BAM file.", category: "common"} - normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } runDir: {description: "The directory to use as run/output directory.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The number of cores to use.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + cores: {description: "The the number of cores required to run a program", category: "common"} + memoryGb: {description: "The memory required to run the manta", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Germline { +task Somatic { input { - File bamFile - File bamIndex + File tumorBam + File tumorBamIndex + File? normalBam + File? normalBamIndex File referenceFasta File referenceFastaFai - String runDir + String runDir = "./manta_run" File? callRegions File? callRegionsIndex Boolean exome = false @@ -91,9 +79,9 @@ task Germline { } command { - set -e configManta.py \ - ~{"--normalBam " + bamFile} \ + ~{"--normalBam " + normalBam} \ + ~{"--tumorBam " + tumorBam} \ --referenceFasta ~{referenceFasta} \ ~{"--callRegions " + callRegions} \ --runDir ~{runDir} \ @@ -106,8 +94,18 @@ task Germline { } output { - File mantaVCF = runDir + "/results/variants/diploidSV.vcf.gz" - File mantaVCFindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" + File candidateSmallIndelsVcf = runDir + "/results/variants/candidateSmallIndels.vcf.gz" + File candidateSmallIndelsVcfIndex = runDir + "/results/variants/candidateSmallIndels.vcf.gz.tbi" + File candidateSVVcf = runDir + "/results/variants/candidateSV.vcf.gz" + File candidatSVVcfIndex = runDir + "/results/variants/candidateSV.vcf.gz.tbi" + File tumorSVVcf = if defined(normalBam) + then runDir + "/results/variants/somaticSV.vcf.gz" + else runDir + "/results/variants/tumorSV.vcf.gz" + File tumorSVVcfIndex = if defined(normalBam) + then runDir + "/results/variants/somaticSV.vcf.gz.tbi" + else runDir + "/results/variants/tumorSV.vcf.gz.tbi" + File? diploidSV = runDir + "/results/variants/diploidSV.vcf.gz" + File? diploidSVindex = runDir + "/results/variants/diploidSV.vcf.gz.tbi" } runtime { @@ -117,17 +115,19 @@ task Germline { } parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} - referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} - referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } + # inputs + tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} runDir: {description: "The directory to use as run/output directory.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The the number of cores required to run a program", category: "common"} - memoryGb: {description: "The memory required to run the manta", category: "common"} + cores: {description: "The number of cores to use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - From 259cc599c01a84dc00b90b5f13d2be3c603c3c47 Mon Sep 17 00:00:00 2001 From: cedrick Date: Tue, 11 Feb 2020 15:56:36 +0100 Subject: [PATCH 0203/1208] fix meta --- clever.wdl | 20 ++++++++++---------- delly.wdl | 4 ++-- manta.wdl | 6 +++--- picard.wdl | 4 ++-- survivor.wdl | 16 ++++++++-------- 5 files changed, 25 insertions(+), 25 deletions(-) diff --git a/clever.wdl b/clever.wdl index b61a840b..57259074 100644 --- a/clever.wdl +++ b/clever.wdl @@ -43,8 +43,8 @@ task Prediction { bamIndex: {description: "The index bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - threads: {description: "The the number of threads required to run a program", category: "common"} - memory: {description: "The memory required to run the programs", category: "common"} + threads: {description: "The the number of threads required to run a program", category: "advanced"} + memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -92,16 +92,16 @@ task Mateclever { } parameter_meta { - fiteredBam: {description: "The bam file where sequences less than 30bp were removed.", category: "required"} - indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} + fiteredBam: {description: "The bam file where sequences less than 30bp were removed.", category: "advanced"} + indexedFiteredBam: {description: "The index of the filtered bam file.", category: "advanced"} bwaIndex: {description: "The BWA index files.", category: "required"} - predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} + predictions: {description: "The predicted deletions (VCF) from clever.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - threads: {description: "The the number of threads required to run a program", category: "common"} - maxOffset: {description: "Maximum center distance between split-read and read-pair deletion to be considered identical", category: "common"} - maxLengthDiff: {description: "Maximum length difference between split-read and read-pair deletion to be considered identical ", category: "common"} - cleverMaxDelLength: {description: "Maximum deletion length to look for from clever predictions.", category: "common"} - memory: {description: "The memory required to run the programs", category: "common"} + threads: {description: "The the number of threads required to run a program", category: "advanced"} + maxOffset: {description: "Maximum center distance between split-read and read-pair deletion to be considered identical", category: "advanced"} + maxLengthDiff: {description: "Maximum length difference between split-read and read-pair deletion to be considered identical ", category: "advanced"} + cleverMaxDelLength: {description: "Maximum deletion length to look for from clever predictions.", category: "advanced"} + memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/delly.wdl b/delly.wdl index 482cd8ad..f194bdcf 100644 --- a/delly.wdl +++ b/delly.wdl @@ -35,10 +35,10 @@ task CallSV { parameter_meta { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} - referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The memory required to run the programs", category: "common"} + memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/manta.wdl b/manta.wdl index f1ad5d08..3e9be16e 100644 --- a/manta.wdl +++ b/manta.wdl @@ -119,14 +119,14 @@ task Germline { parameter_meta { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} - referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } runDir: {description: "The directory to use as run/output directory.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The the number of cores required to run a program", category: "common"} - memoryGb: {description: "The memory required to run the manta", category: "common"} + cores: {description: "The the number of cores required to run a program", category: "advanced"} + memoryGb: {description: "The memory required to run the manta", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/picard.wdl b/picard.wdl index ae02a0d7..f35ad21a 100644 --- a/picard.wdl +++ b/picard.wdl @@ -656,8 +656,8 @@ task RenameSample { inputVcf: {description: "The VCF file to process.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} newSampleName: {description: "A string to replace the old sample name.", category: "required"} - memory: {description: "The memory required to run the programs", category: "common"} - javaXmx: {description: "The max. memory allocated for JAVA", category: "common"} + memory: {description: "The memory required to run the programs", category: "advanced"} + javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/survivor.wdl b/survivor.wdl index 8539d223..34bfd9dd 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -44,16 +44,16 @@ task Merge { parameter_meta { # inputs - filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "required"} - breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "required"} - suppVecs: {description: "The minimum number of SV callers to support the merging", category: "required"} - svType: {description: "A boolean to include the type SV to be merged", category: "required"} - strandType: {description: "A boolean to include strand type of an SV to be merged", category: "required"} - distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "required"} - minSize: {description: "The mimimum size of SV to be merged", category: "required"} + filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "advanced"} + breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "advanced"} + suppVecs: {description: "The minimum number of SV callers to support the merging", category: "advanced"} + svType: {description: "A boolean to include the type SV to be merged", category: "advanced"} + strandType: {description: "A boolean to include strand type of an SV to be merged", category: "advanced"} + distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "advanced"} + minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} sample: {description: "The name of the sample", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The memory required to run the programs", category: "common"} + memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From d05b87517b58b6d29b3f48f9d58ccff1da1494d8 Mon Sep 17 00:00:00 2001 From: cedrick Date: Tue, 11 Feb 2020 16:05:00 +0100 Subject: [PATCH 0204/1208] fix merging conflict --- clever.wdl | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/clever.wdl b/clever.wdl index 4a0ba84b..f7a7a3eb 100644 --- a/clever.wdl +++ b/clever.wdl @@ -53,9 +53,6 @@ task Mateclever { outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} - cleverMaxDelLength: {description: "Maximum deletion length to look for from clever predictions.", category: "advanced"} - maxLengthDiff: {description: "Maximum length difference between split-read and read-pair deletion to be considered identical ", category: "advanced"} - maxOffset: {description: "Maximum center distance between split-read and read-pair deletion to be considered identical", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -96,7 +93,6 @@ task Prediction { } parameter_meta { -<<<<<<< HEAD fiteredBam: {description: "The bam file where sequences less than 30bp were removed.", category: "advanced"} indexedFiteredBam: {description: "The index of the filtered bam file.", category: "advanced"} bwaIndex: {description: "The BWA index files.", category: "required"} @@ -107,15 +103,6 @@ task Prediction { maxLengthDiff: {description: "Maximum length difference between split-read and read-pair deletion to be considered identical ", category: "advanced"} cleverMaxDelLength: {description: "Maximum deletion length to look for from clever predictions.", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} -======= - # inputs - bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} - bwaIndex: {description: "The BWA index files.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} - threads: {description: "The the number of threads required to run a program", category: "common"} - memory: {description: "The memory required to run the programs", category: "common"} ->>>>>>> 338f85339b62da7921f2d5d68d0870e81a80d542 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From b885b5fcf4a1d5ecbd5869146ade42a0bf22a8da Mon Sep 17 00:00:00 2001 From: cedrick Date: Tue, 11 Feb 2020 16:08:40 +0100 Subject: [PATCH 0205/1208] removed common.wdl import --- picard.wdl | 2 -- survivor.wdl | 2 -- 2 files changed, 4 deletions(-) diff --git a/picard.wdl b/picard.wdl index f35ad21a..79a9bb11 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1,7 +1,5 @@ version 1.0 -import "common.wdl" - task BedToIntervalList { input { File bedFile diff --git a/survivor.wdl b/survivor.wdl index 34bfd9dd..26417136 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -1,7 +1,5 @@ version 1.0 -import "common.wdl" - task Merge { input{ Array[File] filePaths From 9ec5ab9d38bf62aa074415cc7f717be53ff4ba09 Mon Sep 17 00:00:00 2001 From: cedrick Date: Tue, 11 Feb 2020 16:15:01 +0100 Subject: [PATCH 0206/1208] add missing parameter_meta --- bcftools.wdl | 7 ++++++- samtools.wdl | 8 +++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index fbc3639d..7c85c960 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -4,7 +4,6 @@ task Bcf2Vcf { input { File bcf String outputPath - String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } @@ -21,4 +20,10 @@ task Bcf2Vcf { runtime { docker: dockerImage } + + parameter_meta { + bcf: {description: "The generated BCF from an SV caller", category: "advanced"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } } diff --git a/samtools.wdl b/samtools.wdl index ac8fedfe..a07657b0 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -381,7 +381,6 @@ task FilterShortReadsBam { File bamFile String outputPathBam String outputPathBamIndex - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -402,4 +401,11 @@ task FilterShortReadsBam { runtime { docker: dockerImage } + + parameter_meta { + bamFile: {description: "The bam file to process.", category: "required"} + outputPathBam: {description: "The filtered bam file.", category: "advanced"} + outputPathBamIndex: {description: "The index of filtered bam file.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } } From 407435d83abdeee839e09c425f325456fba69140 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Feb 2020 16:16:37 +0100 Subject: [PATCH 0207/1208] add license headers --- CPAT.wdl | 20 ++++++++++++++++++++ biopet/bamstats.wdl | 20 +++++++++++++++++++- biopet/biopet.wdl | 20 ++++++++++++++++++++ biopet/sampleconfig.wdl | 20 ++++++++++++++++++++ biopet/seqstat.wdl | 20 +++++++++++++++++++- bwa.wdl | 20 ++++++++++++++++++++ centrifuge.wdl | 10 +++++----- chunked-scatter.wdl | 20 ++++++++++++++++++++ collect-columns.wdl | 20 ++++++++++++++++++++ common.wdl | 20 ++++++++++++++++++++ cutadapt.wdl | 20 ++++++++++++++++++++ fastqc.wdl | 20 ++++++++++++++++++++ flash.wdl | 20 ++++++++++++++++++++ gffread.wdl | 20 ++++++++++++++++++++ hisat2.wdl | 20 ++++++++++++++++++++ htseq.wdl | 20 ++++++++++++++++++++ macs2.wdl | 20 ++++++++++++++++++++ manta.wdl | 20 ++++++++++++++++++++ multiqc.wdl | 20 ++++++++++++++++++++ ncbi.wdl | 20 ++++++++++++++++++++ picard.wdl | 20 ++++++++++++++++++++ samtools.wdl | 20 ++++++++++++++++++++ scripts | 2 +- seqtk.wdl | 20 ++++++++++++++++++++ somaticseq.wdl | 20 ++++++++++++++++++++ spades.wdl | 20 ++++++++++++++++++++ star.wdl | 20 ++++++++++++++++++++ strelka.wdl | 20 ++++++++++++++++++++ stringtie.wdl | 20 ++++++++++++++++++++ unicycler.wdl | 20 ++++++++++++++++++++ vardict.wdl | 20 ++++++++++++++++++++ 31 files changed, 584 insertions(+), 8 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index 73c9d13c..423e99ac 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task CPAT { input { File gene diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index a00ed97e..7def9aec 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -1,6 +1,24 @@ version 1.0 -# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. import "../common.wdl" as common diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 8efb91be..ec64fb4b 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import "../common.wdl" task BaseCounter { diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl index 59f0bad2..0fbd466a 100644 --- a/biopet/sampleconfig.wdl +++ b/biopet/sampleconfig.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import "../common.wdl" as common task SampleConfig { diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl index ade4b45d..6694a759 100644 --- a/biopet/seqstat.wdl +++ b/biopet/seqstat.wdl @@ -1,6 +1,24 @@ version 1.0 -# Copyright Sequencing Analysis Support Core - Leiden University Medical Center 2018 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. import "../common.wdl" as common diff --git a/bwa.wdl b/bwa.wdl index 655cd288..7a945469 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Mem { input { File read1 diff --git a/centrifuge.wdl b/centrifuge.wdl index a3e7aeaf..c5fd66f5 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2018 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 6b320368..619292d9 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task ChunkedScatter { input { File inputFile diff --git a/collect-columns.wdl b/collect-columns.wdl index 09788e20..8b1fa387 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task CollectColumns { input { Array[File]+ inputTables diff --git a/common.wdl b/common.wdl index 389b3511..ef86abcc 100644 --- a/common.wdl +++ b/common.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task AppendToStringArray { input { Array[String] array diff --git a/cutadapt.wdl b/cutadapt.wdl index 58b10d73..f6b8211e 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Cutadapt { input { File read1 diff --git a/fastqc.wdl b/fastqc.wdl index 31c2b80d..b13c19d1 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Fastqc { input { File seqFile diff --git a/flash.wdl b/flash.wdl index 03a3646c..6e704921 100644 --- a/flash.wdl +++ b/flash.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import "common.wdl" as common task Flash { diff --git a/gffread.wdl b/gffread.wdl index 43682fbc..6b23785c 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task GffRead { input { File inputGff diff --git a/hisat2.wdl b/hisat2.wdl index 1575f7e3..bc6be2e8 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Hisat2 { input { Array[File]+ indexFiles diff --git a/htseq.wdl b/htseq.wdl index 63ea849a..900a88a7 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task HTSeqCount { input { Array[File]+ inputBams diff --git a/macs2.wdl b/macs2.wdl index 9e4c4c2c..fad3cb00 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task PeakCalling { input { Array[File]+ inputBams diff --git a/manta.wdl b/manta.wdl index d0ca75e0..9439ddc2 100644 --- a/manta.wdl +++ b/manta.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import "common.wdl" task Somatic { diff --git a/multiqc.wdl b/multiqc.wdl index f30cadea..db1dd21e 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task MultiQC { input { # Use a string here so cromwell does not relocate an entire analysis directory diff --git a/ncbi.wdl b/ncbi.wdl index 0678d3d4..d157d902 100644 --- a/ncbi.wdl +++ b/ncbi.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task GenomeDownload { input { String outputPath diff --git a/picard.wdl b/picard.wdl index 48ebf2d5..ce14fd2d 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task BedToIntervalList { input { File bedFile diff --git a/samtools.wdl b/samtools.wdl index 73aa9525..83df4c60 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task BgzipAndIndex { input { File inputFile diff --git a/scripts b/scripts index ff036a83..e93b2224 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit ff036a83f20a6b20fe39c7b738c2b2e38897515b +Subproject commit e93b2224c355ace382a93fbdf99ed29a6aad19f5 diff --git a/seqtk.wdl b/seqtk.wdl index 0b1419d6..321ab132 100644 --- a/seqtk.wdl +++ b/seqtk.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Sample { input { File sequenceFile diff --git a/somaticseq.wdl b/somaticseq.wdl index 55dd4b94..49e5c36d 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task ParallelPaired { input { File? classifierSNV diff --git a/spades.wdl b/spades.wdl index a0d5fa1e..204dbfea 100644 --- a/spades.wdl +++ b/spades.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Spades { input { String outputDir diff --git a/star.wdl b/star.wdl index bc6ae5d9..e1e55a26 100644 --- a/star.wdl +++ b/star.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Star { input { Array[File]+ inputR1 diff --git a/strelka.wdl b/strelka.wdl index 212863ce..826cbd8e 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import "common.wdl" as common task Germline { diff --git a/stringtie.wdl b/stringtie.wdl index 2dcaa9a1..cfaccc92 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Stringtie { input { File bam diff --git a/unicycler.wdl b/unicycler.wdl index 642d7a86..fc393603 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Unicycler { input { String? preCommand diff --git a/vardict.wdl b/vardict.wdl index ed9ee22d..7bfd118e 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import "common.wdl" task VarDict { From 9db6d9c3b6506f8d4115a1212819814983fab213 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Feb 2020 16:17:20 +0100 Subject: [PATCH 0208/1208] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index e93b2224..0ae4cffe 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit e93b2224c355ace382a93fbdf99ed29a6aad19f5 +Subproject commit 0ae4cffec92a9145c1e7990df6f1b509a3d38276 From db4bdd7e8264ea7938bd1ce29e2d91a958f80001 Mon Sep 17 00:00:00 2001 From: cedrick Date: Tue, 11 Feb 2020 16:18:09 +0100 Subject: [PATCH 0209/1208] modify SURVIVOR memory from 128G to 24G --- survivor.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/survivor.wdl b/survivor.wdl index 26417136..13119324 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -12,7 +12,7 @@ task Merge { String sample String outputPath - String memory = "128G" + String memory = "24G" String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } From fd50c034bc979b9b9880c4035bd1ed795092ff6b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Feb 2020 16:21:47 +0100 Subject: [PATCH 0210/1208] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f621caa9..ecb5034d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add proper copyright headers to all WDL files. So the free software license + is clear to end users who wish to adapt and modify. + Add pedigree input for HaplotypeCaller and GenotypeGVCFs. + Combined biopet.ScatterRegions and biopet.ReorderedGlobbedScatters into one. biopet.ScatterRegions now always returns correctly ordered scatters. From 1627159d594c76435ca1a0ce7d7c80dd1411f60d Mon Sep 17 00:00:00 2001 From: cedrick Date: Tue, 11 Feb 2020 16:24:31 +0100 Subject: [PATCH 0211/1208] fix parameter_meta in clever --- clever.wdl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/clever.wdl b/clever.wdl index f7a7a3eb..7d428d5e 100644 --- a/clever.wdl +++ b/clever.wdl @@ -93,15 +93,13 @@ task Prediction { } parameter_meta { - fiteredBam: {description: "The bam file where sequences less than 30bp were removed.", category: "advanced"} - indexedFiteredBam: {description: "The index of the filtered bam file.", category: "advanced"} + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} predictions: {description: "The predicted deletions (VCF) from clever.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} - maxOffset: {description: "Maximum center distance between split-read and read-pair deletion to be considered identical", category: "advanced"} - maxLengthDiff: {description: "Maximum length difference between split-read and read-pair deletion to be considered identical ", category: "advanced"} - cleverMaxDelLength: {description: "Maximum deletion length to look for from clever predictions.", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 7a95b91595699a5ffc40dd9f80108291f9c2bf28 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 11 Feb 2020 16:52:29 +0100 Subject: [PATCH 0212/1208] fixed parameter_meta in manta somatic --- manta.wdl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/manta.wdl b/manta.wdl index 055f7a7e..2366de92 100644 --- a/manta.wdl +++ b/manta.wdl @@ -115,16 +115,18 @@ task Somatic { } parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} - referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} - referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } + tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} runDir: {description: "The directory to use as run/output directory.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The the number of cores required to run a program", category: "advanced"} - memoryGb: {description: "The memory required to run the manta", category: "advanced"} + cores: {description: "The number of cores to use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 444cde046dd40e8c2b5d3856ac9fe054a27f0f74 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 11 Feb 2020 16:56:46 +0100 Subject: [PATCH 0213/1208] set optional booleans to false --- samtools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index a07657b0..c73e28dd 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -218,8 +218,8 @@ task Fastq { Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter - Boolean? appendReadNumber - Boolean? outputQuality + Boolean appendReadNumber = false + Boolean outputQuality = false Int? compressionLevel Int threads = 1 @@ -315,8 +315,8 @@ task View { File inFile File? referenceFasta String outputFileName = "view.bam" - Boolean? includeHeader - Boolean? outputBam + Boolean includeHeader = false + Boolean outputBam = false Boolean uncompressedBamOutput = false Int? includeFilter Int? excludeFilter From 150589a99abfe2bff2786e8f42f08603097ac139 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 12 Feb 2020 07:22:12 +0100 Subject: [PATCH 0214/1208] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 0ae4cffe..f61ac52a 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 0ae4cffec92a9145c1e7990df6f1b509a3d38276 +Subproject commit f61ac52a119f53c4220f0f74fc89c738d22fc659 From 38be15dd04ac1112afede26ccdbc97e8dd7905a3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 12 Feb 2020 07:26:28 +0100 Subject: [PATCH 0215/1208] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index f61ac52a..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit f61ac52a119f53c4220f0f74fc89c738d22fc659 +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 From d267afb1d87e811a473d0e3bb33b97d2ea30e3da Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 12 Feb 2020 07:29:54 +0100 Subject: [PATCH 0216/1208] add copyright header to gffcompare --- gffcompare.wdl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/gffcompare.wdl b/gffcompare.wdl index 60d19d5f..ca2b1669 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task GffCompare { input { File? inputGtfList From ca7b9715f1aa5f4779705390752d12056f8473f6 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 09:17:09 +0100 Subject: [PATCH 0217/1208] update submodule scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index ff036a83..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit ff036a83f20a6b20fe39c7b738c2b2e38897515b +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 From 9fcd6da0af6db410c05ef8e49e4c7510b191d081 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 09:43:26 +0100 Subject: [PATCH 0218/1208] add copyright header in CPAT --- CPAT.wdl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CPAT.wdl b/CPAT.wdl index 73c9d13c..423e99ac 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -1,5 +1,25 @@ version 1.0 +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task CPAT { input { File gene From 9ac79200f1340149c99609104d1a324c1d0b9ab2 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 09:58:37 +0100 Subject: [PATCH 0219/1208] add header to CPAT --- CPAT.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index 423e99ac..098d9ca6 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -73,4 +73,4 @@ task CPAT { } # There is also make_hexamer_tab.py and make_logitModel.py -# that can be added as tasks here. \ No newline at end of file +# that can be added as tasks here. From ce0699c2d1477f03d32f87a1f965e941c03a66a4 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 12:34:53 +0100 Subject: [PATCH 0220/1208] add copyright header --- bcftools.wdl | 22 ++++++++++++++++++++++ clever.wdl | 22 ++++++++++++++++++++++ delly.wdl | 22 +++++++++++++++++++++- manta.wdl | 2 -- 4 files changed, 65 insertions(+), 3 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 7c85c960..f40c93e1 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -1,5 +1,27 @@ version 1.0 +# MIT License +# +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Bcf2Vcf { input { File bcf diff --git a/clever.wdl b/clever.wdl index 7d428d5e..2d63928e 100644 --- a/clever.wdl +++ b/clever.wdl @@ -1,5 +1,27 @@ version 1.0 +# MIT License +# +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + import "bwa.wdl" task Mateclever { diff --git a/delly.wdl b/delly.wdl index 33c5784d..814ecff5 100644 --- a/delly.wdl +++ b/delly.wdl @@ -1,6 +1,26 @@ version 1.0 -import "common.wdl" +# MIT License +# +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. task CallSV { input { diff --git a/manta.wdl b/manta.wdl index 83a88011..51be9171 100644 --- a/manta.wdl +++ b/manta.wdl @@ -20,8 +20,6 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -import "common.wdl" - task Germline { input { File bamFile From 976d9e6e20d96d84ebfeaa32a0d677055e228ba7 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 13:25:13 +0100 Subject: [PATCH 0221/1208] remove string sample from survivor.wdl --- survivor.wdl | 3 --- 1 file changed, 3 deletions(-) diff --git a/survivor.wdl b/survivor.wdl index 13119324..68244846 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -9,9 +9,7 @@ task Merge { Int strandType = 1 Int distanceBySvSize = 0 Int minSize = 30 - String sample String outputPath - String memory = "24G" String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } @@ -49,7 +47,6 @@ task Merge { strandType: {description: "A boolean to include strand type of an SV to be merged", category: "advanced"} distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "advanced"} minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} - sample: {description: "The name of the sample", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 18a18df8de7c7f9c3bceefc4d578bf5de7a95b71 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 13:39:48 +0100 Subject: [PATCH 0222/1208] add copyright header in survivor.wdl --- survivor.wdl | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/survivor.wdl b/survivor.wdl index 68244846..d2794ae4 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -1,5 +1,27 @@ version 1.0 +# MIT License +# +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task Merge { input{ Array[File] filePaths From 3ec2d0544ee10080b093f60c00b205b14cc4db53 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 14:55:32 +0100 Subject: [PATCH 0223/1208] fix parameter_meta --- bcftools.wdl | 2 +- clever.wdl | 6 ++++-- delly.wdl | 2 +- manta.wdl | 8 ++++---- picard.wdl | 2 +- samtools.wdl | 4 ++-- survivor.wdl | 4 ++-- 7 files changed, 15 insertions(+), 13 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index f40c93e1..3663b2a3 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,7 +44,7 @@ task Bcf2Vcf { } parameter_meta { - bcf: {description: "The generated BCF from an SV caller", category: "advanced"} + bcf: {description: "The generated BCF from an SV caller", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/clever.wdl b/clever.wdl index 2d63928e..a101e2c9 100644 --- a/clever.wdl +++ b/clever.wdl @@ -119,10 +119,12 @@ task Prediction { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} - predictions: {description: "The predicted deletions (VCF) from clever.", category: "advanced"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "required"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + predictions: {description: "The predicted deletions (VCF) from clever.", category: "advanced"} } } diff --git a/delly.wdl b/delly.wdl index 814ecff5..675a0710 100644 --- a/delly.wdl +++ b/delly.wdl @@ -58,7 +58,7 @@ task CallSV { bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/manta.wdl b/manta.wdl index 51be9171..b7fbc4e1 100644 --- a/manta.wdl +++ b/manta.wdl @@ -66,14 +66,14 @@ task Germline { # inputs bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} - referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } - runDir: {description: "The directory to use as run/output directory.", category: "common"} + runDir: {description: "The directory to use as run/output directory.", category: "required"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The the number of cores required to run a program", category: "common"} - memoryGb: {description: "The memory required to run the manta", category: "common"} + cores: {description: "The the number of cores required to run a program", category: "required"} + memoryGb: {description: "The memory required to run the manta", category: "required"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/picard.wdl b/picard.wdl index 7f2cca95..4b07f526 100644 --- a/picard.wdl +++ b/picard.wdl @@ -672,7 +672,7 @@ task RenameSample { parameter_meta { # inputs inputVcf: {description: "The VCF file to process.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "required"} newSampleName: {description: "A string to replace the old sample name.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"} diff --git a/samtools.wdl b/samtools.wdl index 915f04b7..2570ba47 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -424,8 +424,8 @@ task FilterShortReadsBam { parameter_meta { bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "advanced"} - outputPathBamIndex: {description: "The index of filtered bam file.", category: "advanced"} + outputPathBam: {description: "The filtered bam file.", category: "required"} + outputPathBamIndex: {description: "The index of filtered bam file.", category: "required"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/survivor.wdl b/survivor.wdl index d2794ae4..fba613ec 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -62,14 +62,14 @@ task Merge { parameter_meta { # inputs - filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "advanced"} + filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "required"} breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "advanced"} suppVecs: {description: "The minimum number of SV callers to support the merging", category: "advanced"} svType: {description: "A boolean to include the type SV to be merged", category: "advanced"} strandType: {description: "A boolean to include strand type of an SV to be merged", category: "advanced"} distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "advanced"} minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 1a77dc8f830181f43c5b18ec64978217fb10506b Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 14:58:03 +0100 Subject: [PATCH 0224/1208] fix parameter_meta --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 3663b2a3..d0837ec8 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -45,7 +45,7 @@ task Bcf2Vcf { parameter_meta { bcf: {description: "The generated BCF from an SV caller", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "required"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 5388c793b0ba81b1db3d7a725d47e954103c4c5e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 12 Feb 2020 15:01:30 +0100 Subject: [PATCH 0225/1208] fix CombineVariants docker image --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 78d7a19f..f4ec4f35 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -467,7 +467,7 @@ task CombineVariants { String memory = "24G" String javaXmx = "12G" - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "broadinstitute/gatk3:3.8-1" } command <<< From 64d7837d2b71d96f8bf8053359f11623882014e6 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 15:02:38 +0100 Subject: [PATCH 0226/1208] fix parameter_meta --- clever.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clever.wdl b/clever.wdl index a101e2c9..3e7241e1 100644 --- a/clever.wdl +++ b/clever.wdl @@ -72,7 +72,7 @@ task Mateclever { indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "required"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 91a5cdd20b1efd5b9955c4b03c72372a80e900e7 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 15:30:10 +0100 Subject: [PATCH 0227/1208] add default outputPath --- bcftools.wdl | 2 +- clever.wdl | 4 ++-- delly.wdl | 2 +- manta.wdl | 2 +- picard.wdl | 3 +-- survivor.wdl | 2 +- 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index d0837ec8..866a3101 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -25,7 +25,7 @@ version 1.0 task Bcf2Vcf { input { File bcf - String outputPath + String outputPath = "./bcftools/SV.vcf" String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } diff --git a/clever.wdl b/clever.wdl index 3e7241e1..ca28336e 100644 --- a/clever.wdl +++ b/clever.wdl @@ -30,7 +30,7 @@ task Mateclever { File indexedFiteredBam BwaIndex bwaIndex File predictions - String outputPath + String outputPath = "./clever" Int cleverMaxDelLength = 100000 Int maxLengthDiff= 30 Int maxOffset = 150 @@ -84,7 +84,7 @@ task Prediction { File bamFile File bamIndex BwaIndex bwaIndex - String outputPath + String outputPath = "./clever" Int threads = 10 String memory = "15G" diff --git a/delly.wdl b/delly.wdl index 675a0710..fab06371 100644 --- a/delly.wdl +++ b/delly.wdl @@ -28,7 +28,7 @@ task CallSV { File bamIndex File referenceFasta File referenceFastaFai - String outputPath + String outputPath = "./delly/delly.vcf" String memory = "15G" String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" diff --git a/manta.wdl b/manta.wdl index b7fbc4e1..cd869bb1 100644 --- a/manta.wdl +++ b/manta.wdl @@ -26,7 +26,7 @@ task Germline { File bamIndex File referenceFasta File referenceFastaFai - String runDir + String runDir = "./manta_run" File? callRegions File? callRegionsIndex Boolean exome = false diff --git a/picard.wdl b/picard.wdl index 4b07f526..88ea3a2e 100644 --- a/picard.wdl +++ b/picard.wdl @@ -642,9 +642,8 @@ task SortVcf { task RenameSample { input { File inputVcf - String outputPath + String outputPath = "./picard/renamed.vcf" String newSampleName - String memory = "24G" String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" diff --git a/survivor.wdl b/survivor.wdl index fba613ec..15bff093 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -31,7 +31,7 @@ task Merge { Int strandType = 1 Int distanceBySvSize = 0 Int minSize = 30 - String outputPath + String outputPath = "./survivor/merged.vcf" String memory = "24G" String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } From 835bea58ec0c45f675fd3a0f886facf9c5052094 Mon Sep 17 00:00:00 2001 From: cedrick Date: Wed, 12 Feb 2020 16:18:24 +0100 Subject: [PATCH 0228/1208] remove outputPathBamIndex as input --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 2570ba47..b04dd117 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -400,9 +400,10 @@ task FilterShortReadsBam { input { File bamFile String outputPathBam - String outputPathBamIndex String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } + + String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") command { set -e @@ -425,7 +426,6 @@ task FilterShortReadsBam { parameter_meta { bamFile: {description: "The bam file to process.", category: "required"} outputPathBam: {description: "The filtered bam file.", category: "required"} - outputPathBamIndex: {description: "The index of filtered bam file.", category: "required"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From ba638b55c770bee54cf36335298afb4934c8a332 Mon Sep 17 00:00:00 2001 From: cedrick Date: Thu, 13 Feb 2020 10:14:01 +0100 Subject: [PATCH 0229/1208] add vt task --- vt.wdl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 vt.wdl diff --git a/vt.wdl b/vt.wdl new file mode 100644 index 00000000..7d2eece3 --- /dev/null +++ b/vt.wdl @@ -0,0 +1,45 @@ +version 1.0 + +# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Normalize { + input { + File inputVCF + File inputVCFIndex + File referenceFasta + File referenceFastaFai + String outputPath + String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + String memory = "4G" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + vt normalize ~{inputVCF} -r ~{referenceFasta} -o ~{outputPath} + } + + runtime { + memory: memory + docker: dockerImage + } +} + From ec7194113295de455dadcbd6985fb2b967bb4e4f Mon Sep 17 00:00:00 2001 From: cedrick Date: Thu, 13 Feb 2020 10:20:50 +0100 Subject: [PATCH 0230/1208] add default outputpath --- vt.wdl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vt.wdl b/vt.wdl index 7d2eece3..ac30d77b 100644 --- a/vt.wdl +++ b/vt.wdl @@ -26,7 +26,7 @@ task Normalize { File inputVCFIndex File referenceFasta File referenceFastaFai - String outputPath + String outputPath = "./vt/normalized_decomposed.vcf" String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" String memory = "4G" } @@ -34,7 +34,11 @@ task Normalize { command { set -e mkdir -p "$(dirname ~{outputPath})" - vt normalize ~{inputVCF} -r ~{referenceFasta} -o ~{outputPath} + vt normalize ~{inputVCF} -r ~{referenceFasta} | vt decompose -s - -o ~{outputPath} + } + + output { + File outputVcf = outputPath } runtime { From 197360002b63fc330b67a6978ee8d7a234c73de9 Mon Sep 17 00:00:00 2001 From: cedrick Date: Thu, 13 Feb 2020 10:33:28 +0100 Subject: [PATCH 0231/1208] modify outputPath category from required to common --- bcftools.wdl | 2 +- clever.wdl | 2 +- delly.wdl | 2 +- manta.wdl | 2 +- picard.wdl | 2 +- samtools.wdl | 2 +- survivor.wdl | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 866a3101..122fcdd1 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -45,7 +45,7 @@ task Bcf2Vcf { parameter_meta { bcf: {description: "The generated BCF from an SV caller", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/clever.wdl b/clever.wdl index ca28336e..33ac26f2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -119,7 +119,7 @@ task Prediction { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/delly.wdl b/delly.wdl index fab06371..ad8f18d9 100644 --- a/delly.wdl +++ b/delly.wdl @@ -58,7 +58,7 @@ task CallSV { bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } - outputPath: {description: "The location the output VCF file should be written.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/manta.wdl b/manta.wdl index cd869bb1..5006a01e 100644 --- a/manta.wdl +++ b/manta.wdl @@ -68,7 +68,7 @@ task Germline { bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } - runDir: {description: "The directory to use as run/output directory.", category: "required"} + runDir: {description: "The directory to use as run/output directory.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} diff --git a/picard.wdl b/picard.wdl index 88ea3a2e..7df96aa9 100644 --- a/picard.wdl +++ b/picard.wdl @@ -671,7 +671,7 @@ task RenameSample { parameter_meta { # inputs inputVcf: {description: "The VCF file to process.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} newSampleName: {description: "A string to replace the old sample name.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"} diff --git a/samtools.wdl b/samtools.wdl index b04dd117..de7f2a36 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -425,7 +425,7 @@ task FilterShortReadsBam { parameter_meta { bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "required"} + outputPathBam: {description: "The filtered bam file.", category: "common"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/survivor.wdl b/survivor.wdl index 15bff093..ded11d75 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -69,7 +69,7 @@ task Merge { strandType: {description: "A boolean to include strand type of an SV to be merged", category: "advanced"} distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "advanced"} minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} - outputPath: {description: "The location the output VCF file should be written.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 06d37a72c4f5bdb1c9742f16ccd873e3b9c1b715 Mon Sep 17 00:00:00 2001 From: cedrick Date: Thu, 13 Feb 2020 10:40:52 +0100 Subject: [PATCH 0232/1208] change cleveroutputpath category from required to common --- clever.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clever.wdl b/clever.wdl index 33ac26f2..eab47fde 100644 --- a/clever.wdl +++ b/clever.wdl @@ -72,7 +72,7 @@ task Mateclever { indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From e2b0285e55f6c7d1a130d3d19a88fd2336560991 Mon Sep 17 00:00:00 2001 From: cedrick Date: Thu, 13 Feb 2020 10:44:27 +0100 Subject: [PATCH 0233/1208] add paramter_meta --- vt.wdl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/vt.wdl b/vt.wdl index ac30d77b..e04543c2 100644 --- a/vt.wdl +++ b/vt.wdl @@ -45,5 +45,16 @@ task Normalize { memory: memory docker: dockerImage } + + parameter_meta { + # inputs + inputVcf: {description: "The VCF file to process.", category: "required"} + inputVCFIndex: {description: "The index of the VCF file to be processed.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + memory: {description: "The memory required to run the programs", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } } From 568df5ae71134577795f38b4d815758a35ecd35e Mon Sep 17 00:00:00 2001 From: cedrick Date: Thu, 13 Feb 2020 11:02:24 +0100 Subject: [PATCH 0234/1208] minor fix --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index e04543c2..7d64cc95 100644 --- a/vt.wdl +++ b/vt.wdl @@ -48,7 +48,7 @@ task Normalize { parameter_meta { # inputs - inputVcf: {description: "The VCF file to process.", category: "required"} + inputVCF: {description: "The VCF file to process.", category: "required"} inputVCFIndex: {description: "The index of the VCF file to be processed.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} From 47bcc7b8c62dadd5dc0b22141f3a4c809cc14936 Mon Sep 17 00:00:00 2001 From: cedrick Date: Thu, 13 Feb 2020 11:07:33 +0100 Subject: [PATCH 0235/1208] update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecb5034d..045185c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add vt task for variants normalization and decomposition. + Add proper copyright headers to all WDL files. So the free software license is clear to end users who wish to adapt and modify. + Add pedigree input for HaplotypeCaller and GenotypeGVCFs. From 4642c1c72f744ad8b8b9799c2cdf80052f2378c0 Mon Sep 17 00:00:00 2001 From: cedrick Date: Thu, 13 Feb 2020 12:08:54 +0100 Subject: [PATCH 0236/1208] modify copyright year in vt.wdl --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 7d64cc95..54599db0 100644 --- a/vt.wdl +++ b/vt.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal From c6b9a8e4674a5967c53308df3a79d28bd87f6c3a Mon Sep 17 00:00:00 2001 From: cedrick Date: Thu, 13 Feb 2020 13:50:56 +0100 Subject: [PATCH 0237/1208] fix clever parameter_meta --- clever.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clever.wdl b/clever.wdl index eab47fde..e1dcf5a6 100644 --- a/clever.wdl +++ b/clever.wdl @@ -72,6 +72,9 @@ task Mateclever { indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} + maxOffset: {description: "The maximum center distance between split-read and read-pair deletion to be considered identical.", category: "advanced"} + maxLengthDiff: {description: "The maximum length difference between split-read and read-pair deletion to be considered identical.", category: "advanced"} + cleverMaxDelLength: {description: "The maximum deletion length to look for in Clever predictions.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} From 368d1264a2a67f1751e7e25f3780e1f59466957b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 20 Feb 2020 10:23:44 +0100 Subject: [PATCH 0238/1208] ensure output dir exists --- bwa.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bwa.wdl b/bwa.wdl index 7a945469..665b63ca 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -96,6 +96,7 @@ task Kit { command { set -e + mkdir -p "$(dirname ~{outputPrefix})" bwa mem \ -t ~{threads} \ ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ From 1e66cc029dd28d1afb4aa64c894c732c6d0a7655 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 20 Feb 2020 10:25:00 +0100 Subject: [PATCH 0239/1208] add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ecb5034d..69a6cdd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Fixed a bug where the output directory was not created for bwa.Kit. + Add proper copyright headers to all WDL files. So the free software license is clear to end users who wish to adapt and modify. + Add pedigree input for HaplotypeCaller and GenotypeGVCFs. From accc4662cae07b3e61407405d246a4090fe00a57 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Feb 2020 09:55:02 +0100 Subject: [PATCH 0240/1208] add gatk variantfiltration task --- gatk.wdl | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index f4ec4f35..1aa23ea8 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1401,3 +1401,62 @@ task SplitNCigarReads { category: "advanced"} } } + +task VariantFiltration { + input { + File inputVcf + File inputVcfIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + String outputPath = "filtered.vcf.gz" + Array[String]+ filterArguments + Array[File] intervals = [] + + String memory = "16G" + String javaXmx = "4G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + gatk --java-options -Xmx~{javaXmx} \ + VariantFiltration \ + -I ~{inputVcf} \ + -R ~{referenceFasta} \ + -O ~{outputPath} \ + ~{sep=" " filterArguments} \ + ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} + } + + output { + File filteredVcf = outputPath + File filteredVcfIndex = outputPath + ".tbi" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + inputVcf: {description: "The VCF to be filtered.", category: "required"} + inputVcfIndex: {description: "The input VCF file's index.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} + filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2']", + category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + From 1166aafcdd51f0e2897b3c410c74617b8dd2cb7c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Feb 2020 12:12:34 +0100 Subject: [PATCH 0241/1208] add rtg vcfeval and rtg format --- rtg.wdl | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 rtg.wdl diff --git a/rtg.wdl b/rtg.wdl new file mode 100644 index 00000000..ae1e5157 --- /dev/null +++ b/rtg.wdl @@ -0,0 +1,144 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Format { + input { + String format = "fasta" + String outputPath = "seq_data.sdf" + Array[File]+ inputFiles + String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" + String memory = "4G" + } + + command { + set -e + mkdir -p $(dirname ~{outputPath}) + rtg format -f ~{format} \ + -o ~{outputPath} \ + ~{sep=' ' inputFiles} + } + + output { + File sdf = outputPath + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe] (Default is fasta)", + category: "advanced"} + outputPath: {description: "Where the output should be placed.", category: "advanced"} + inputFiles: {description: "input sequence files. May be specified 1 or more times."} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + } +} + +task VcfEval { + input { + File baseline + File baselineIndex + File calls + File callsIndex + File? evaluationRegions + String outputDir = "output/" + String? region + File template + Boolean allRecords = false + Boolean decompose = false + Boolean refOverlap = false + String? sample + Boolean squashPloidy = false + String outputMode = "split" + Int threads = 1 # tool default is number of cores in the system 😱 + String memory = "4G" + String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" + } + + command { + set -e + mkdir -p ~{outputDir} + rtg vcfeval \ + --baseline ~{baseline} \ + --calls ~{calls} \ + ~{"--evaluation-regions " + evaluationRegions} \ + --output ~{outputDir} \ + --template ~{template} \ + ~{true="--all-records" false="" allRecords} \ + ~{true="--decompose" false="" decompose} \ + ~{true="--ref-overlap" false="" refOverlap} \ + ~{"--sample " + sample } \ + ~{true="--squash-ploidy" false="" squashPloidy} \ + ~{"--output-mode " + outputMode} \ + --threads ~{threads} + } + + output { + File falseNegativesVcf = outputDir + "/fn.vcf.gz" + File falseNegativesVcfIndex = outputDir + "/fn.vcf.gz.tbi" + File falsePositivesVcf = outputDir + "/fp.vcf.gz" + File falsePositivesVcfIndex = outputDir + "/fp.vcf.gz.tbi" + File summary = outputDir + "/summary.txt" + File truePositivesBaselineVcf = outputDir + "/tp-baseline.vcf.gz" + File truePositivesBaselineVcfIndex = outputDir + "/tp-baseline.vcf.gz.tbi" + File truePositivesVcf = outputDir + "/tp.vcf.gz" + File truePositivesVcfIndex = outputDir + "/tp.vcf.gz.tbi" + File nonSnpRoc = outputDir + "/non_snp_roc.tsv.gz" + File phasing = outputDir + "/phasing.txt" + File weightedRoc = outputDir + "/weighted_roc.tsv.gz" + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + } + + parameter_meta { + baseline: {description: "VCF file containing baseline variants", category: "required"} + baselineIndex: {description: "The baseline's VCF index", category: "required"} + calls: {description: "VCF file containing called variants", category: "required"} + callsIndex: {description: "The call's VCF index", category: "required"} + outputDir: {description: "Directory for output", category: "advanced"} + template: {description: "SDF of the reference genome the variants are called against", category: "required"} + allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\")", + category: "common"} + decompose: {description: "decompose complex variants into smaller constituents to allow partial credit", category: "common"} + refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap)", + category: "common"} + sample: {description: "the name of the sample to select. Use , to select different sample names for baseline and calls. (Required when using multi-sample VCF files)", + category: "common"} + squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences", + category: "common"} + outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split)", + category: "advanced"} + threads: {description: "Number of threads. Default is 1", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + } +} + From 996a5fefa828ca89191696d8284bff73c6e3f6cc Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Feb 2020 14:46:13 +0100 Subject: [PATCH 0242/1208] add selectvariants tasks --- gatk.wdl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 1aa23ea8..754bbc6d 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1347,6 +1347,62 @@ task PreprocessIntervals { } } +task SelectVariants { + input { + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File inputVcf + File inputVcfIndex + String outputPath = "output.vcf.gz" + String? selectTypeToInclude + Array[File] intervals = [] + String memory = "16G" + String javaXmx = "4G" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + gatk --java-options -Xmx~{javaXmx} \ + SelectVariants \ + -R ~{referenceFasta} \ + -V ~{inputVcf} \ + ~{"--select-type-to-include " + selectTypeToInclude} \ + ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + docker: dockerImage + memory: memory + } + + parameter_meta { + inputVcf: {description: "The VCF input file.", category: "required"} + inputVcfIndex: {description: "The input VCF file's index.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", + category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + selectTypeToInclude: {description: "Select only a certain type of variants from the input file", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "required"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task SplitNCigarReads { input { File inputBam From 0023ed3eff49eff9769dca5d5e2155b43a8e0ab4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Feb 2020 15:47:26 +0100 Subject: [PATCH 0243/1208] add bedregions input --- rtg.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rtg.wdl b/rtg.wdl index ae1e5157..bc5aa275 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -64,6 +64,7 @@ task VcfEval { File calls File callsIndex File? evaluationRegions + File? bedRegions String outputDir = "output/" String? region File template @@ -85,6 +86,7 @@ task VcfEval { --baseline ~{baseline} \ --calls ~{calls} \ ~{"--evaluation-regions " + evaluationRegions} \ + ~{"--bed-regions " + bedRegions} \ --output ~{outputDir} \ --template ~{template} \ ~{true="--all-records" false="" allRecords} \ From efc225af1afcf2ab4e0740d93a6c07d093e3d102 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Feb 2020 09:28:58 +0100 Subject: [PATCH 0244/1208] update tasks --- gatk.wdl | 3 ++- rtg.wdl | 10 ++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 754bbc6d..4029ae02 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1370,7 +1370,8 @@ task SelectVariants { -R ~{referenceFasta} \ -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ - ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} + ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ + -O ~{outputPath} } output { diff --git a/rtg.wdl b/rtg.wdl index bc5aa275..278fd015 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -66,7 +66,6 @@ task VcfEval { File? evaluationRegions File? bedRegions String outputDir = "output/" - String? region File template Boolean allRecords = false Boolean decompose = false @@ -79,9 +78,9 @@ task VcfEval { String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } - command { + command <<< set -e - mkdir -p ~{outputDir} + mkdir -p "$(dirname ~{outputDir})" rtg vcfeval \ --baseline ~{baseline} \ --calls ~{calls} \ @@ -96,7 +95,7 @@ task VcfEval { ~{true="--squash-ploidy" false="" squashPloidy} \ ~{"--output-mode " + outputMode} \ --threads ~{threads} - } + >>> output { File falseNegativesVcf = outputDir + "/fn.vcf.gz" @@ -125,6 +124,9 @@ task VcfEval { calls: {description: "VCF file containing called variants", category: "required"} callsIndex: {description: "The call's VCF index", category: "required"} outputDir: {description: "Directory for output", category: "advanced"} + bedRegions: {description: "if set, only read VCF records that overlap the ranges contained in the specified BED file", category: "advanced"} + evaluationRegions: {description: "if set, evaluate within regions contained in the supplied BED file, allowing transborder matches. To be used for truth-set high-confidence regions or other regions of interest where region boundary effects should be minimized", + category: "advanced"} template: {description: "SDF of the reference genome the variants are called against", category: "required"} allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\")", category: "common"} From 474c1d39ae58dcf8f66675e6a1b4382dbb3afa41 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Feb 2020 11:24:35 +0100 Subject: [PATCH 0245/1208] add allStats output --- rtg.wdl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/rtg.wdl b/rtg.wdl index 278fd015..bc2caef6 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -110,6 +110,18 @@ task VcfEval { File nonSnpRoc = outputDir + "/non_snp_roc.tsv.gz" File phasing = outputDir + "/phasing.txt" File weightedRoc = outputDir + "/weighted_roc.tsv.gz" + Array[File] allStats = [falseNegativesVcf, + falseNegativesVcfIndex, + falsePositivesVcf, + falsePositivesVcfIndex, + truePositivesBaselineVcf, + truePositivesBaselineVcfIndex, + truePositivesVcf, + truePositivesVcfIndex, + summary, + nonSnpRoc, + phasing, + weightedRoc] } runtime { From df513d108e8bc1656a9ded275b78e022da95e8e0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Feb 2020 11:32:23 +0100 Subject: [PATCH 0246/1208] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb14e6cf..388b242f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Added rtg.Format and rtg.VcfEval tasks. ++ Added gatk.SelectVariants and gatk.VariantFiltration tasks. + Fixed a bug where the output directory was not created for bwa.Kit. + Add vt task for variants normalization and decomposition. + Update WDL task Picard (Add task RenameSample) From 57ac6f2c2272ca30812333bf58e7b7fcdf249161 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Feb 2020 15:42:28 +0100 Subject: [PATCH 0247/1208] change default mem requirements --- rtg.wdl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rtg.wdl b/rtg.wdl index bc2caef6..c528c3a1 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -26,13 +26,14 @@ task Format { String outputPath = "seq_data.sdf" Array[File]+ inputFiles String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" - String memory = "4G" + String rtgMem = "8G" + String memory = "16G" } command { set -e mkdir -p $(dirname ~{outputPath}) - rtg format -f ~{format} \ + rtg RTG_MEM=~{rtgMem} format -f ~{format} \ -o ~{outputPath} \ ~{sep=' ' inputFiles} } @@ -74,14 +75,15 @@ task VcfEval { Boolean squashPloidy = false String outputMode = "split" Int threads = 1 # tool default is number of cores in the system 😱 - String memory = "4G" + String rtgMem = "4G" + String memory = "8G" String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } command <<< set -e mkdir -p "$(dirname ~{outputDir})" - rtg vcfeval \ + rtg RTG_MEM=~{rtgMem} vcfeval \ --baseline ~{baseline} \ --calls ~{calls} \ ~{"--evaluation-regions " + evaluationRegions} \ From 8e1438b627f50b8cb66dccbce3f653b4a2f6fad9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Feb 2020 16:50:12 +0100 Subject: [PATCH 0248/1208] increase default memory --- rtg.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtg.wdl b/rtg.wdl index c528c3a1..10856eea 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -75,8 +75,8 @@ task VcfEval { Boolean squashPloidy = false String outputMode = "split" Int threads = 1 # tool default is number of cores in the system 😱 - String rtgMem = "4G" - String memory = "8G" + String rtgMem = "8G" + String memory = "16G" String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } From bb6896686042d1c60e19ad4483bfd5bed255a617 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 3 Mar 2020 09:17:13 +0100 Subject: [PATCH 0249/1208] address review comments --- gatk.wdl | 4 ++-- rtg.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 4029ae02..cd0ed187 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1393,8 +1393,8 @@ task SelectVariants { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} selectTypeToInclude: {description: "Select only a certain type of variants from the input file", category: "common"} - outputPath: {description: "The location the output VCF file should be written.", category: "required"} - intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} + outputPath: {description: "The location the output VCF file should be written.", category: "advanced"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", diff --git a/rtg.wdl b/rtg.wdl index 10856eea..b629b1d0 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -51,7 +51,7 @@ task Format { format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe] (Default is fasta)", category: "advanced"} outputPath: {description: "Where the output should be placed.", category: "advanced"} - inputFiles: {description: "input sequence files. May be specified 1 or more times."} + inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 414d8f3cee29b172d04ebaa3593bf84f8e428eea Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 3 Mar 2020 11:12:08 +0100 Subject: [PATCH 0250/1208] add rtgMem parameter_meta --- rtg.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rtg.wdl b/rtg.wdl index b629b1d0..8fd53ca4 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -55,6 +55,7 @@ task Format { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} } } @@ -156,6 +157,7 @@ task VcfEval { threads: {description: "Number of threads. Default is 1", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} } } From efa12219975ce34bc9f39bd6ebb7e83e25e2a5a2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 9 Mar 2020 12:55:34 +0100 Subject: [PATCH 0251/1208] add output mode to haplotypecaller --- CHANGELOG.md | 1 + gatk.wdl | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 388b242f..b2c61b58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add `--output-mode` flag to haplotypecaller. + Added rtg.Format and rtg.VcfEval tasks. + Added gatk.SelectVariants and gatk.VariantFiltration tasks. + Fixed a bug where the output directory was not created for bwa.Kit. diff --git a/gatk.wdl b/gatk.wdl index cd0ed187..244a3e2c 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -907,6 +907,7 @@ task HaplotypeCaller { File? dbsnpVCFIndex File? pedigree Int? ploidy + String? outputMode Boolean gvcf = false String memory = "12G" @@ -928,6 +929,7 @@ task HaplotypeCaller { ~{"-D " + dbsnpVCF} \ ~{"--pedigree " + pedigree} \ ~{"--contamination-fraction-per-sample-file " + contamination} \ + ~{"--output-mode " + outputMode} \ ~{true="-ERC GVCF" false="" gvcf} } @@ -955,6 +957,8 @@ task HaplotypeCaller { category: "required"} referenceFastaIndex: {description: "The index for the reference fasta file.", category: "required"} contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} + outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.", + category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} From 328a1d43d9495b6c431e6a650efe35cd50c82fc9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 11 Mar 2020 09:22:00 +0100 Subject: [PATCH 0252/1208] add RefConfidence mode --- gatk.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 244a3e2c..0ccf5196 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -909,6 +909,7 @@ task HaplotypeCaller { Int? ploidy String? outputMode Boolean gvcf = false + String emitRefConfidence = if gvcf then "GVCF" else "NONE" String memory = "12G" String javaXmx = "4G" @@ -930,7 +931,7 @@ task HaplotypeCaller { ~{"--pedigree " + pedigree} \ ~{"--contamination-fraction-per-sample-file " + contamination} \ ~{"--output-mode " + outputMode} \ - ~{true="-ERC GVCF" false="" gvcf} + --emit-ref-confidence ~{emitRefConfidence} } output { From f23b631bf06e51e75328219393f5178962181548 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 11 Mar 2020 09:38:35 +0100 Subject: [PATCH 0253/1208] update changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b2c61b58..298e2451 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- -+ Add `--output-mode` flag to haplotypecaller. ++ Allow setting the `--emit-ref-confidence` flag for HaplotypeCaller ++ Add `--output-mode` flag to HaplotypeCaller. + Added rtg.Format and rtg.VcfEval tasks. + Added gatk.SelectVariants and gatk.VariantFiltration tasks. + Fixed a bug where the output directory was not created for bwa.Kit. From 94332596940fbf7d0892b16af593c3fca956b206 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 11 Mar 2020 09:44:34 +0100 Subject: [PATCH 0254/1208] add parameter_meta --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 0ccf5196..b730cbee 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -960,6 +960,8 @@ task HaplotypeCaller { contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.", category: "advanced"} + emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'", + category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} From 18e028ac6d486cf5349ba9f52a9a0f316b7e81a6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 11 Mar 2020 09:45:15 +0100 Subject: [PATCH 0255/1208] add check parameter_meta to pr template --- .github/PULL_REQUEST_TEMPLATE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 29fde58b..199344f5 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,4 @@ ### Checklist - [ ] Pull request details were added to CHANGELOG.md +- [ ] `parameter_meta` for each task is up to date. From a5d0c4439a0880df9fcb98181825ef84c0078214 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 11 Mar 2020 09:57:35 +0100 Subject: [PATCH 0256/1208] Update CHANGELOG.md Punctuation. Co-Authored-By: JasperBoom --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 298e2451..6fe791f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- -+ Allow setting the `--emit-ref-confidence` flag for HaplotypeCaller ++ Allow setting the `--emit-ref-confidence` flag for HaplotypeCaller. + Add `--output-mode` flag to HaplotypeCaller. + Added rtg.Format and rtg.VcfEval tasks. + Added gatk.SelectVariants and gatk.VariantFiltration tasks. From 5977ad90688fa93c42933ca9c6e146d85af3577f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Mar 2020 15:58:10 +0100 Subject: [PATCH 0257/1208] Update talon container. --- talon.wdl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/talon.wdl b/talon.wdl index 05018962..7e81d3f1 100644 --- a/talon.wdl +++ b/talon.wdl @@ -31,7 +31,7 @@ task CreateAbundanceFileFromDatabase { File? datasetsFile String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4.1_cv1" + String dockerImage = "biocontainers/talon:v4.4.2_cv1" } command { @@ -84,7 +84,7 @@ task CreateGtfFromDatabase { File? datasetFile String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4.1_cv1" + String dockerImage = "biocontainers/talon:v4.4.2_cv1" } command { @@ -135,7 +135,7 @@ task FilterTalonTranscripts { File? pairingsFile String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4.1_cv1" + String dockerImage = "biocontainers/talon:v4.4.2_cv1" } command { @@ -180,7 +180,7 @@ task GetReadAnnotations { File? datasetFile String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4.1_cv1" + String dockerImage = "biocontainers/talon:v4.4.2_cv1" } command { @@ -228,7 +228,7 @@ task InitializeTalonDatabase { String outputPrefix String memory = "10G" - String dockerImage = "biocontainers/talon:v4.4.1_cv1" + String dockerImage = "biocontainers/talon:v4.4.2_cv1" } command { @@ -277,7 +277,7 @@ task ReformatGtf { File GTFfile String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4.1_cv1" + String dockerImage = "biocontainers/talon:v4.4.2_cv1" } command { @@ -315,7 +315,7 @@ task SummarizeDatasets { File? datasetGroupsCSV String memory = "4G" - String dockerImage = "biocontainers/talon:v4.4.1_cv1" + String dockerImage = "biocontainers/talon:v4.4.2_cv1" } command { @@ -364,7 +364,7 @@ task Talon { Int cores = 4 String memory = "25G" - String dockerImage = "biocontainers/talon:v4.4.1_cv1" + String dockerImage = "biocontainers/talon:v4.4.2_cv1" } command <<< From e22f26429c9792f64fc0cf84a92d659ed1cd7769 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Mar 2020 16:00:04 +0100 Subject: [PATCH 0258/1208] Upload WDL files CCS, Lima and IsoSeq3. --- ccs.wdl | 90 +++++++++++++++++++++++++++++++++++++++++ isoseq3.wdl | 24 +++++++++++ lima.wdl | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 ccs.wdl create mode 100644 isoseq3.wdl create mode 100644 lima.wdl diff --git a/ccs.wdl b/ccs.wdl new file mode 100644 index 00000000..ec968443 --- /dev/null +++ b/ccs.wdl @@ -0,0 +1,90 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task ccs { + input { + Int minPasses = 3 + Int minLength = 10 + Int maxLength = 50000 + Boolean byStrand = false + Float minReadQuality = 0.99 + String logLevel = "WARN" + File subreadsFile + String outputPrefix + + Int cores = 4 + String memory = "10G" + String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + ccs \ + ~{"--min-passes " + minPasses} \ + ~{"--min-length " + minLength} \ + ~{"--max-length " + maxLength} \ + ~{true="--by-strand" false="" byStrand} \ + ~{"--min-rq " + minReadQuality} \ + ~{"--log-level " logLevel} \ + ~{"--num-threads " + cores} \ + ~{"--report-file " + outputPrefix + ".ccs.report.txt"} \ + ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ + ~{subreadsFile} + ~{outputPrefix + ".ccs.bam"} + } + + output { + File outputCCSfile = outputPrefix + ".ccs.bam" + File outputCCSindexFile = outputPrefix + ".ccs.bam.pbi" + File outputReportFile = outputPrefix + ".ccs.report.txt" + File outputSTDERRfile = outputPrefix + ".ccs.stderr.log" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + minPasses: {description: "Minimum number of full-length subreads required to generate CCS for a +ZMW.", category: "advanced"} + minLength: {description: "Minimum draft length before polishing.", category: "advanced"} + maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} + byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} + minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} + logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} + subreadsFile: {description: "Subreads input file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputCCSfile: {description: "Consensus reads output file."} + outputCCSindexFile: {description: "Index of consensus reads output file."} + outputReportFile: {description: "CCS results report file."} + outputSTDERRfile: {description: "CCS STDERR log file."} + } +} diff --git a/isoseq3.wdl b/isoseq3.wdl new file mode 100644 index 00000000..78c136f0 --- /dev/null +++ b/isoseq3.wdl @@ -0,0 +1,24 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task refine { +} diff --git a/lima.wdl b/lima.wdl new file mode 100644 index 00000000..24dd9ea3 --- /dev/null +++ b/lima.wdl @@ -0,0 +1,112 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task lima { + input { + String libraryDesign = "same" + Boolean scoreFullPass = false + Int maxScoredBarcodePairs = 0 + Int maxScoredBarcodes = 0 + Int maxScoredAdapters = 0 + Int minPasses = 0 + Int minLength = 50 + Int maxInputLength = 0 + Float minRefSpan = 0.5 + Int minScoringRegion = 1 + Int minScore = 0 + Int minEndScore = 0 + Int minSignalIncrease = 10 + Int minScoreLead = 10 + Boolean ccsMode = false + Boolean splitBamNamed = false + Float scoredAdapterRatio = 0.25 + Int peek = 0 + Int guess = 0 + Int guessMinCount = 0 + Boolean peekGuess = false + String logLevel = "WARN" + File inputBamFile + File barcodeFile + String outputPrefix + + Int cores = 4 + String memory = "10G" + String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" + } + + Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + lima \ + ~{libraryDesignOptions[libraryDesign]} \ + ~{true="--score-full-pass" false="" scoreFullPass} \ + ~{"--max-scored-barcode-pairs " + maxScoredBarcodePairs} \ + ~{"--max-scored-barcodes " + maxScoredBarcodes} \ + ~{"--max-scored-adapters " + maxScoredAdapters} \ + ~{"--min-passes " + minPasses} \ + ~{"--min-length " + minLength} \ + ~{"--max-input-length " + maxInputLength} \ + ~{"--min-ref-span " + minRefSpan} \ + ~{"--min-scoring-regions " + minScoringRegion} \ + ~{"--min-score " + minScore} \ + ~{"--min-end-score " + minEndScore} \ + ~{"--min-signal-increase " + minSignalIncrease} \ + ~{"--min-score-lead " + minScoreLead} \ + ~{true="--ccs" false="" ccsMode} \ + ~{true="--split-bam-named" false="" splitBamNamed} \ + ~{"--scored-adapter-ratio " + scoredAdapterRatio} \ + ~{"--peek " + peek} \ + ~{"--guess " + guess} \ + ~{"--guess-min-count " + guessMinCount} \ + ~{true="--peek-guess" false="" peekGuess} \ + ~{"--log-level " logLevel} \ + ~{"--num-threads " + cores} \ + ~{"--log-file " + outputPrefix + "_fl_stderr.log"} \ + ~{inputBamFile} \ + ~{barcodeFile} \ + ~{outputPrefix + ".fl.bam"} + } + + output { + File outputFLfile = outputPrefix + ".fl.bam" + File outputFLindexFile = outputPrefix + ".fl.bam.pbi" + File outputSTDERRfile = outputPrefix + ".fl.stderr.log" + File outputJSONfile = outputPrefix + ".fl.json" + File outputCountsFile = outputPrefix + ".fl.lima.counts" + File outputReportFile = outputPrefix + ".fl.lima.report" + File outputSummaryFile = outputPrefix + ".fl.lima.summary" + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + libraryDesign: {description: "", category: ""} + # outputs + } +} From 62b74c14b556b3bac9efb00c2c42d6c05031b710 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 11 Mar 2020 16:37:33 +0100 Subject: [PATCH 0259/1208] make statsPrefix optional for umitools dedup --- umi-tools.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index e684ef5a..35d54299 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -71,10 +71,10 @@ task Dedup { File inputBam File inputBamIndex String outputBamPath - String statsPrefix = "stats" + String? statsPrefix Boolean paired = true - String memory = "20G" + String memory = "5G" # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" @@ -88,7 +88,7 @@ task Dedup { umi_tools dedup \ --stdin ~{inputBam} \ --stdout ~{outputBamPath} \ - --output-stats ~{statsPrefix} \ + ~{"--output-stats " + statsPrefix} \ ~{true="--paired" false="" paired} samtools index ~{outputBamPath} ~{outputBamIndex} } @@ -96,9 +96,9 @@ task Dedup { output { File deduppedBam = outputBamPath File deduppedBamIndex = outputBamIndex - File editDistance = statsPrefix + "_edit_distance.tsv" - File umiStats = statsPrefix + "_per_umi.tsv" - File positionStats = statsPrefix + "_per_umi_per_position.tsv" + File? editDistance = statsPrefix + "_edit_distance.tsv" + File? umiStats = statsPrefix + "_per_umi.tsv" + File? positionStats = statsPrefix + "_per_umi_per_position.tsv" } runtime { From 38a764857d5f8362bc0a623d24d970c34eece378 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Mar 2020 16:41:12 +0100 Subject: [PATCH 0260/1208] Update parameter_meta Lima.wdl. --- isoseq3.wdl | 22 ++++++++++++++++++++++ lima.wdl | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/isoseq3.wdl b/isoseq3.wdl index 78c136f0..57213352 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -21,4 +21,26 @@ version 1.0 # SOFTWARE. task refine { + input { + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + } + + output { + } + + runtime { + cpu: cores + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + + # outputs + } } diff --git a/lima.wdl b/lima.wdl index 24dd9ea3..c25a35d2 100644 --- a/lima.wdl +++ b/lima.wdl @@ -106,7 +106,42 @@ task lima { parameter_meta { # inputs - libraryDesign: {description: "", category: ""} + libraryDesign: {description: "Barcode structure of the library design.", category: "required"} + scoreFullPass: {description: "Only use subreads flanked by adapters for barcode identification.", category: "advanced"} + maxScoredBarcodePairs: {description: "Only use up to N barcode pair regions to find the barcode, 0 means use all.", category: "advanced"} + maxScoredBarcodes: {description: "Analyze at maximum the provided number of barcodes per ZMW, 0 means deactivated.", category: "advanced"} + maxScoredAdapters: {description: "Analyze at maximum the provided number of adapters per ZMW, 0 means deactivated.", category: "advanced"} + minPasses: {description: "Minimal number of full passes.", category: "common"} + minLength: {description: "Minimum sequence length after clipping.", category: "common"} + maxInputLength: {description: "Maximum input sequence length, 0 means deactivated.", category: "advanced"} + minRefSpan: {description: "Minimum reference span relative to the barcode length.", category: "advanced"} + minScoringRegion: {description: "Minimum number of barcode regions with sufficient relative span to the barcode length.", category: "advanced"} + minScore: {description: "Reads below the minimum barcode score are removed from downstream analysis.", category: "common"} + minEndScore: {description: "Minimum end barcode score threshold is applied to the individual leading and trailing ends.", category: "advanced"} + minSignalIncrease: {description: "The minimal score difference, between first and combined, required to call a barcode pair different.", category: "advanced"} + minScoreLead: {description: "The minimal score lead required to call a barcode pair significant.", category: "common"} + ccsMode: {description: "CCS mode, use optimal alignment options.", category: "common"} + splitBamNamed: {description: "Split BAM output by resolved barcode pair name.", category: "common"} + scoredAdapterRatio: {description: "Minimum ratio of scored vs sequenced adapters.", category: "advanced"} + peek: {description: "Demux the first N ZMWs and return the mean score, 0 means peeking deactivated.", category: "advanced"} + guess: {description: "Try to guess the used barcodes, using the provided mean score threshold, 0 means guessing deactivated.", category: "advanced"} + guessMinCount: {description: "Minimum number of ZMWs observed to whitelist barcodes.", category: "advanced"} + peekGuess: {description: "Try to infer the used barcodes subset, by peeking at the first 50,000 ZMWs.", category: "advanced"} + logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} + inputBamFile: {description: "BAM input file.", category: "required"} + barcodeFile: {description: "Barcode fasta file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs + outputFLfile: {description: "Demultiplexed reads output file."} + outputFLindexFile: {description: "Index of demultiplexed reads output file."} + outputSTDERRfile: {description: "Lima STDERR log file."} + outputJSONfile: {description: "Lima JSON file."} + outputCountsFile: {description: "Lima counts file."} + outputReportFile: {description: "Lima report file."} + outputSummaryFile: {description: "Lima summary file."} } } From 787b11e475d8e22c5b673afc7e325a38d392255c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 11 Mar 2020 16:52:23 +0100 Subject: [PATCH 0261/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fe791f5..a263cc1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ The statsPrefix input for umitools dedup is norw optional. + Allow setting the `--emit-ref-confidence` flag for HaplotypeCaller. + Add `--output-mode` flag to HaplotypeCaller. + Added rtg.Format and rtg.VcfEval tasks. From 9fc05a169439305a22411bb8240c0807362f177c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 11 Mar 2020 16:53:05 +0100 Subject: [PATCH 0262/1208] Update CHANGELOG.md Co-Authored-By: JasperBoom --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a263cc1f..1d156f54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- -+ The statsPrefix input for umitools dedup is norw optional. ++ The statsPrefix input for umitools dedup is now optional. + Allow setting the `--emit-ref-confidence` flag for HaplotypeCaller. + Add `--output-mode` flag to HaplotypeCaller. + Added rtg.Format and rtg.VcfEval tasks. From a3118f8a8d81e01b7680f502d4bd827ef15cf6b5 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 09:27:23 +0100 Subject: [PATCH 0263/1208] Update CHANGELOG. --- CHANGELOG.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fe791f5..6c037d25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,19 +11,21 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add CCS workflow WDL files (ccs.wdl, lima.wdl, isoseq3.wdl). ++ Update TALON version to 4.4.2. + Allow setting the `--emit-ref-confidence` flag for HaplotypeCaller. + Add `--output-mode` flag to HaplotypeCaller. + Added rtg.Format and rtg.VcfEval tasks. + Added gatk.SelectVariants and gatk.VariantFiltration tasks. + Fixed a bug where the output directory was not created for bwa.Kit. + Add vt task for variants normalization and decomposition. -+ Update WDL task Picard (Add task RenameSample) -+ Update WDL task Samtools (Add task FilterShortReadsBam) -+ Add WDL task for BCFtools (bcf to vcf) -+ Add WDL task for SURVIVOR (merge) -+ Update WDL task Manta (Add germline SV calling) -+ Add WDL task for Delly -+ Add WDL task for Clever (and Mate-Clever) ++ Update WDL task Picard (Add task RenameSample). ++ Update WDL task Samtools (Add task FilterShortReadsBam). ++ Add WDL task for BCFtools (bcf to vcf). ++ Add WDL task for SURVIVOR (merge). ++ Update WDL task Manta (Add germline SV calling). ++ Add WDL task for Delly. ++ Add WDL task for Clever (and Mate-Clever). + Add proper copyright headers to all WDL files. So the free software license is clear to end users who wish to adapt and modify. + Add pedigree input for HaplotypeCaller and GenotypeGVCFs. @@ -42,7 +44,7 @@ version 2.2.0-dev + Update parameter_meta for TALON, Centrifuge and Minimap2. + Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location. + Add `minimumContigLength` input to PlotDenoisedCopyRatios and PlotModeledSegments. -+ Add `commonVariantSitesIndex` input to CollectAllelicCounts ++ Add `commonVariantSitesIndex` input to CollectAllelicCounts. + Centrifuge: Fix issue where Centrifuge could not locate index files. + Increase default memory of BWA mem to 32G (was 16G). + Add `memory` input to fastqc task. @@ -77,9 +79,9 @@ version 2.2.0-dev + Removed the "extraArgs" input from FilterMutectCalls. + Removed unused "verbose" and "quiet" inputs from multiqc. + Added parameter_meta sections to a variety of tasks. -+ Picard's BedToIntervalList outputPath input is now optional (with a default of "regions.interval_list") ++ Picard's BedToIntervalList outputPath input is now optional (with a default of "regions.interval_list"). + TALON: Fix SQLite error concerning database/disk space being full. -+ Update htseq to default image version 0.11.2 ++ Update htseq to default image version 0.11.2. + Update biowdl-input-converter in common.wdl to version 0.2.1. + Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. + Removed unused inputs (trimPrimer and format) for cutadapt. From 9f26f2cb730d0ddbc0ffde4acc58578027c8704a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 12 Mar 2020 09:47:04 +0100 Subject: [PATCH 0264/1208] fix umitools dedup output --- umi-tools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 35d54299..359df102 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -96,9 +96,9 @@ task Dedup { output { File deduppedBam = outputBamPath File deduppedBamIndex = outputBamIndex - File? editDistance = statsPrefix + "_edit_distance.tsv" - File? umiStats = statsPrefix + "_per_umi.tsv" - File? positionStats = statsPrefix + "_per_umi_per_position.tsv" + File? editDistance = select_first([statsPrefix, "stats"]) + "_edit_distance.tsv" + File? umiStats = select_first([statsPrefix, "stats"]) + "_per_umi.tsv" + File? positionStats = select_first([statsPrefix, "stats"]) + "_per_umi_per_position.tsv" } runtime { From f2772fd043d71849f5cc6b6d4de547a15f8a0c88 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 12 Mar 2020 10:10:41 +0100 Subject: [PATCH 0265/1208] fix umitools dedup output --- umi-tools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 359df102..2d244c1d 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -96,9 +96,9 @@ task Dedup { output { File deduppedBam = outputBamPath File deduppedBamIndex = outputBamIndex - File? editDistance = select_first([statsPrefix, "stats"]) + "_edit_distance.tsv" - File? umiStats = select_first([statsPrefix, "stats"]) + "_per_umi.tsv" - File? positionStats = select_first([statsPrefix, "stats"]) + "_per_umi_per_position.tsv" + File? editDistance = "~{statsPrefix}_edit_distance.tsv" + File? umiStats = "~{statsPrefix}_per_umi.tsv" + File? positionStats = "~{statsPrefix}_per_umi_per_position.tsv" } runtime { From bc0152e3f64a2ad02d8a4762af29e04b3ba3279a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 11:02:00 +0100 Subject: [PATCH 0266/1208] Update IsoSeq3 WDL file. --- isoseq3.wdl | 40 ++++++++++++++++++++++++++++++++++++++++ lima.wdl | 8 ++++---- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/isoseq3.wdl b/isoseq3.wdl index 57213352..620f9ed5 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -22,14 +22,39 @@ version 1.0 task refine { input { + Int minPolyAlength = 20 + Boolean requirePolyA = false + String logLevel = "WARN" + File inputBamFile + File primerFile + String outputPrefix + + Int cores = 4 + String memory = "10G" + String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } command { set -e mkdir -p "$(dirname ~{outputPrefix})" + isoseq3 refine \ + ~{"--min-poly-length" + minPolyAlength} \ + ~{true="--require-polya" false="" requirePolyA} \ + ~{"--log-level " logLevel} \ + ~{"--num-threads " + cores} \ + ~{"--log-file " + outputPrefix + ".flnc.stderr.log"} \ + ~{inputBamFile} \ + ~{primerFile} \ + ~{outputPrefix + ".flnc.bam"} } output { + File outputFLfile = outputPrefix + ".flnc.bam" + File outputFLindexFile = outputPrefix + ".flnc.bam.pbi" + File outputSTDERRfile = outputPrefix + ".flnc.stderr.log" + File outputConsensusReadsetFile = outputPrefix + ".consensusreadset.xml" + File outputFilterSummaryFile = outputPrefix + ".filter_summary.json" + File outputReportFile = outputPrefix + ".report.csv" } runtime { @@ -40,7 +65,22 @@ task refine { parameter_meta { # inputs + minPolyAlength: {description: "Minimum poly(A) tail length.", category: "advanced"} + requirePolyA: {description: "Require FL reads to have a poly(A) tail and remove it.", category: "common"} + logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} + inputBamFile: {description: "BAM input file.", category: "required"} + primerFile: {description: "Barcode/primer fasta file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs + outputFLfile: {description: "Filtered reads output file."} + outputFLindexFile: {description: "Index of filtered reads output file."} + outputSTDERRfile: {description: "Refine STDERR log file."} + outputConsensusReadsetFile: {description: "Refine consensus readset XML file."} + outputFilterSummaryFile: {description: "Refine summary file."} + outputReportFile: {description: "Refine report file."} } } diff --git a/lima.wdl b/lima.wdl index c25a35d2..1534111e 100644 --- a/lima.wdl +++ b/lima.wdl @@ -82,15 +82,15 @@ task lima { ~{true="--peek-guess" false="" peekGuess} \ ~{"--log-level " logLevel} \ ~{"--num-threads " + cores} \ - ~{"--log-file " + outputPrefix + "_fl_stderr.log"} \ + ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ ~{outputPrefix + ".fl.bam"} } output { - File outputFLfile = outputPrefix + ".fl.bam" - File outputFLindexFile = outputPrefix + ".fl.bam.pbi" + File outputFLfile = outputPrefix + "*.bam" + File outputFLindexFile = outputPrefix + "*.bam.pbi" File outputSTDERRfile = outputPrefix + ".fl.stderr.log" File outputJSONfile = outputPrefix + ".fl.json" File outputCountsFile = outputPrefix + ".fl.lima.counts" @@ -129,7 +129,7 @@ task lima { peekGuess: {description: "Try to infer the used barcodes subset, by peeking at the first 50,000 ZMWs.", category: "advanced"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "BAM input file.", category: "required"} - barcodeFile: {description: "Barcode fasta file.", category: "required"} + barcodeFile: {description: "Barcode/primer fasta file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From 085041289d9dd56a5fb2fc9ca3eba5982d8d589b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 11:11:20 +0100 Subject: [PATCH 0267/1208] Remove weird newline. --- ccs.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index ec968443..041d1cb1 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -68,8 +68,7 @@ task ccs { parameter_meta { # inputs - minPasses: {description: "Minimum number of full-length subreads required to generate CCS for a -ZMW.", category: "advanced"} + minPasses: {description: "Minimum number of full-length subreads required to generate CCS for a ZMW.", category: "advanced"} minLength: {description: "Minimum draft length before polishing.", category: "advanced"} maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} From c8b252a1bd78b1b37178238d037fe205e45f28b9 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 11:14:26 +0100 Subject: [PATCH 0268/1208] Add missing plus sign. --- ccs.wdl | 2 +- isoseq3.wdl | 2 +- lima.wdl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index 041d1cb1..fc2ac622 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -45,7 +45,7 @@ task ccs { ~{"--max-length " + maxLength} \ ~{true="--by-strand" false="" byStrand} \ ~{"--min-rq " + minReadQuality} \ - ~{"--log-level " logLevel} \ + ~{"--log-level " + logLevel} \ ~{"--num-threads " + cores} \ ~{"--report-file " + outputPrefix + ".ccs.report.txt"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ diff --git a/isoseq3.wdl b/isoseq3.wdl index 620f9ed5..4232d572 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -40,7 +40,7 @@ task refine { isoseq3 refine \ ~{"--min-poly-length" + minPolyAlength} \ ~{true="--require-polya" false="" requirePolyA} \ - ~{"--log-level " logLevel} \ + ~{"--log-level " + logLevel} \ ~{"--num-threads " + cores} \ ~{"--log-file " + outputPrefix + ".flnc.stderr.log"} \ ~{inputBamFile} \ diff --git a/lima.wdl b/lima.wdl index 1534111e..e9fd21b7 100644 --- a/lima.wdl +++ b/lima.wdl @@ -80,7 +80,7 @@ task lima { ~{"--guess " + guess} \ ~{"--guess-min-count " + guessMinCount} \ ~{true="--peek-guess" false="" peekGuess} \ - ~{"--log-level " logLevel} \ + ~{"--log-level " + logLevel} \ ~{"--num-threads " + cores} \ ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ ~{inputBamFile} \ From 19715aa25e254b76648899eed28e14248ff6d302 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 11:21:56 +0100 Subject: [PATCH 0269/1208] Rename tasks. --- ccs.wdl | 2 +- isoseq3.wdl | 2 +- lima.wdl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index fc2ac622..5cf0b4d3 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task ccs { +task CCS { input { Int minPasses = 3 Int minLength = 10 diff --git a/isoseq3.wdl b/isoseq3.wdl index 4232d572..41eeeb08 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task refine { +task Refine { input { Int minPolyAlength = 20 Boolean requirePolyA = false diff --git a/lima.wdl b/lima.wdl index e9fd21b7..059094c0 100644 --- a/lima.wdl +++ b/lima.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task lima { +task Lima { input { String libraryDesign = "same" Boolean scoreFullPass = false From 73b04864b738e03cbb4448a83937d184fde0daed Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 13:03:44 +0100 Subject: [PATCH 0270/1208] Rewrite command syntax. --- ccs.wdl | 12 ++++++------ isoseq3.wdl | 6 +++--- lima.wdl | 36 ++++++++++++++++++------------------ minimap2.wdl | 14 +++++++------- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index 5cf0b4d3..2ae54cb7 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -40,13 +40,13 @@ task CCS { set -e mkdir -p "$(dirname ~{outputPrefix})" ccs \ - ~{"--min-passes " + minPasses} \ - ~{"--min-length " + minLength} \ - ~{"--max-length " + maxLength} \ + --min-passes ~{minPasses} \ + --min-length ~{minLength} \ + --max-length ~{maxLength} \ ~{true="--by-strand" false="" byStrand} \ - ~{"--min-rq " + minReadQuality} \ - ~{"--log-level " + logLevel} \ - ~{"--num-threads " + cores} \ + --min-rq ~{minReadQuality} \ + --log-level ~{logLevel} \ + --num-threads ~{cores} \ ~{"--report-file " + outputPrefix + ".ccs.report.txt"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ ~{subreadsFile} diff --git a/isoseq3.wdl b/isoseq3.wdl index 41eeeb08..d7111c9a 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -38,10 +38,10 @@ task Refine { set -e mkdir -p "$(dirname ~{outputPrefix})" isoseq3 refine \ - ~{"--min-poly-length" + minPolyAlength} \ + --min-poly-length ~{minPolyAlength} \ ~{true="--require-polya" false="" requirePolyA} \ - ~{"--log-level " + logLevel} \ - ~{"--num-threads " + cores} \ + --log-level ~{logLevel} \ + --num-threads ~{cores} \ ~{"--log-file " + outputPrefix + ".flnc.stderr.log"} \ ~{inputBamFile} \ ~{primerFile} \ diff --git a/lima.wdl b/lima.wdl index 059094c0..52f16970 100644 --- a/lima.wdl +++ b/lima.wdl @@ -61,27 +61,27 @@ task Lima { lima \ ~{libraryDesignOptions[libraryDesign]} \ ~{true="--score-full-pass" false="" scoreFullPass} \ - ~{"--max-scored-barcode-pairs " + maxScoredBarcodePairs} \ - ~{"--max-scored-barcodes " + maxScoredBarcodes} \ - ~{"--max-scored-adapters " + maxScoredAdapters} \ - ~{"--min-passes " + minPasses} \ - ~{"--min-length " + minLength} \ - ~{"--max-input-length " + maxInputLength} \ - ~{"--min-ref-span " + minRefSpan} \ - ~{"--min-scoring-regions " + minScoringRegion} \ - ~{"--min-score " + minScore} \ - ~{"--min-end-score " + minEndScore} \ - ~{"--min-signal-increase " + minSignalIncrease} \ - ~{"--min-score-lead " + minScoreLead} \ + --max-scored-barcode-pairs ~{maxScoredBarcodePairs} \ + --max-scored-barcodes ~{maxScoredBarcodes} \ + --max-scored-adapters ~{maxScoredAdapters} \ + --min-passes ~{minPasses} \ + --min-length ~{minLength} \ + --max-input-length ~{maxInputLength} \ + --min-ref-span ~{minRefSpan} \ + --min-scoring-regions ~{minScoringRegion} \ + --min-score ~{minScore} \ + --min-end-score ~{minEndScore} \ + --min-signal-increase ~{minSignalIncrease} \ + --min-score-lead ~{minScoreLead} \ ~{true="--ccs" false="" ccsMode} \ ~{true="--split-bam-named" false="" splitBamNamed} \ - ~{"--scored-adapter-ratio " + scoredAdapterRatio} \ - ~{"--peek " + peek} \ - ~{"--guess " + guess} \ - ~{"--guess-min-count " + guessMinCount} \ + --scored-adapter-ratio ~{scoredAdapterRatio} \ + --peek ~{peek} \ + --guess ~{guess} \ + --guess-min-count ~{guessMinCount} \ ~{true="--peek-guess" false="" peekGuess} \ - ~{"--log-level " + logLevel} \ - ~{"--num-threads " + cores} \ + --log-level ~{logLevel} \ + --num-threads ~{cores} \ ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ diff --git a/minimap2.wdl b/minimap2.wdl index 32c0666a..fd28d4a9 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -40,10 +40,10 @@ task Indexing { mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ ~{true="-H" false="" useHomopolymerCompressedKmer} \ - ~{"-k " + kmerSize} \ - ~{"-w " + minimizerWindowSize} \ + -k ~{kmerSize} \ + -w ~{minimizerWindowSize} \ ~{"-d " + outputPrefix + ".mmi"} \ - ~{"-t " + cores} \ + -t ~{cores} \ ~{"-I " + splitIndex} \ ~{referenceFile} } @@ -103,14 +103,14 @@ task Mapping { set -e mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ - ~{"-x " + presetOption} \ - ~{"-k " + kmerSize} \ + -x ~{presetOption} \ + -k ~{kmerSize} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="-a" false="" outputSAM} \ - ~{"-o " + outputPrefix} \ + -o ~{outputPrefix} \ ~{true="--MD" false="" addMDtagToSAM} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ - ~{"-t " + cores} \ + -t ~{cores} \ ~{"-G " + maxIntronLength} \ ~{"-F " + maxFragmentLength} \ ~{"-N " + retainMaxSecondaryAlignments} \ From 8550676aafe623ae9bf178bcf477b5a5cd09aea2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 13:13:16 +0100 Subject: [PATCH 0271/1208] Update command syntax. --- talon.wdl | 60 ++++++++++++++++++++++----------------------- transcriptclean.wdl | 20 +++++++-------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/talon.wdl b/talon.wdl index 7e81d3f1..6ddb841e 100644 --- a/talon.wdl +++ b/talon.wdl @@ -38,10 +38,10 @@ task CreateAbundanceFileFromDatabase { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_abundance \ - ~{"--db=" + databaseFile} \ - ~{"-a " + annotationVersion} \ - ~{"-b " + genomeBuild} \ - ~{"--o=" + outputPrefix} \ + --db=~{databaseFile} \ + -a ~{annotationVersion} \ + -b ~{genomeBuild} \ + --o=~{outputPrefix} \ ~{"--whitelist=" + whitelistFile} \ ~{"-d " + datasetsFile} } @@ -91,12 +91,12 @@ task CreateGtfFromDatabase { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_create_GTF \ - ~{"--db=" + databaseFile} \ - ~{"-b " + genomeBuild} \ - ~{"-a " + annotationVersion} \ - ~{"--o=" + outputPrefix} \ - ~{"--whitelist=" + whitelistFile} \ + --db=~{databaseFile} \ + -b ~{genomeBuild} \ + -a ~{annotationVersion} \ + --o=~{outputPrefix} \ ~{true="--observed" false="" observedInDataset} \ + ~{"--whitelist=" + whitelistFile} \ ~{"-d " + datasetFile} } @@ -142,8 +142,8 @@ task FilterTalonTranscripts { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_filter_transcripts \ - ~{"--db=" + databaseFile} \ - ~{"-a " + annotationVersion} \ + --db=~{databaseFile} \ + -a ~{annotationVersion} \ ~{"--o=" + outputPrefix + "_whitelist.csv"} \ ~{"-p " + pairingsFile} } @@ -187,9 +187,9 @@ task GetReadAnnotations { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_fetch_reads \ - ~{"--db " + databaseFile} \ - ~{"--build " + genomeBuild} \ - ~{"--o " + outputPrefix} \ + --db ~{databaseFile} \ + --build ~{genomeBuild} \ + --o ~{outputPrefix} \ ~{"--datasets " + datasetFile} } @@ -235,14 +235,14 @@ task InitializeTalonDatabase { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_initialize_database \ - ~{"--f=" + GTFfile} \ - ~{"--g=" + genomeBuild} \ - ~{"--a=" + annotationVersion} \ - ~{"--l=" + minimumLength} \ - ~{"--idprefix=" + novelIDprefix} \ - ~{"--5p=" + cutoff5p} \ - ~{"--3p=" + cutoff3p} \ - ~{"--o=" + outputPrefix} + --f=~{GTFfile} \ + --g=~{genomeBuild} \ + --a=~{annotationVersion} \ + --l=~{minimumLength} \ + --idprefix=~{novelIDprefix} \ + --5p=~{cutoff5p} \ + --3p=~{cutoff3p} \ + --o=~{outputPrefix} } output { @@ -283,7 +283,7 @@ task ReformatGtf { command { set -e talon_reformat_gtf \ - ~{"-gtf " + GTFfile} + -gtf ~{GTFfile} } output { @@ -322,9 +322,9 @@ task SummarizeDatasets { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_summarize \ - ~{"--db " + databaseFile} \ + --db ~{databaseFile} \ ~{true="--verbose" false="" setVerbose} \ - ~{"--o " + outputPrefix} \ + --o ~{outputPrefix} \ ~{"--groups " + datasetGroupsCSV} } @@ -381,11 +381,11 @@ task Talon { done talon \ ~{"--f " + outputPrefix + "/talonConfigFile.csv"} \ - ~{"--db " + databaseFile} \ - ~{"--build " + genomeBuild} \ - ~{"--threads " + cores} \ - ~{"--cov " + minimumCoverage} \ - ~{"--identity " + minimumIdentity} \ + --db ~{databaseFile} \ + --build ~{genomeBuild} \ + --threads ~{cores} \ + --cov ~{minimumCoverage} \ + --identity ~{minimumIdentity} \ ~{"--o " + outputPrefix + "/run"} >>> diff --git a/transcriptclean.wdl b/transcriptclean.wdl index e288e316..68bcbf24 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -35,9 +35,9 @@ task GetSJsFromGtf { set -e mkdir -p "$(dirname ~{outputPrefix})" get_SJs_from_gtf \ - ~{"--f=" + GTFfile} \ - ~{"--g=" + genomeFile} \ - ~{"--minIntronSize=" + minIntronSize} \ + --f=~{GTFfile} \ + --g=~{genomeFile} \ + --minIntronSize=~{minIntronSize} \ ~{"--o=" + outputPrefix + ".tsv"} } @@ -131,19 +131,19 @@ task TranscriptClean { set -e mkdir -p "$(dirname ~{outputPrefix})" TranscriptClean \ - ~{"-s " + SAMfile} \ - ~{"-g " + referenceGenome} \ - ~{"-t " + cores} \ - ~{"--maxLenIndel=" + maxLenIndel} \ - ~{"--maxSJOffset=" + maxSJoffset} \ - ~{"-o " + outputPrefix} \ + -s ~{SAMfile} \ + -g ~{referenceGenome} \ + -t ~{cores} \ + --maxLenIndel=~{maxLenIndel} \ + --maxSJOffset=~{maxSJoffset} \ + -o ~{outputPrefix} \ ~{true="-m true" false="-m false" correctMismatches} \ ~{true="-i true" false="-i false" correctIndels} \ ~{true="--correctSJs=true" false="--correctSJs=false" correctSJs} \ ~{true="--dryRun" false="" dryRun} \ ~{true="--primaryOnly" false="" primaryOnly} \ ~{true="--canonOnly" false="" canonOnly} \ - ~{"--bufferSize=" + bufferSize} \ + --bufferSize=~{bufferSize} \ ~{true="--deleteTmp" false="" deleteTmp} \ ~{"-j " + spliceJunctionAnnotation} \ ~{"-v " + variantFile} From 9ee58adb60a51bfd349fa6bc11d9760f93955336 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 13:15:35 +0100 Subject: [PATCH 0272/1208] Last syntax update command section. --- centrifuge.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index c5fd66f5..1fbc7be1 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -44,15 +44,15 @@ task Build { set -e mkdir -p "$(dirname ~{outputPrefix})" centrifuge-build \ - ~{"--threads " + threads} \ + --threads ~{threads} \ ~{true="--nodc" false="" disableDifferenceCover} \ ~{"--offrate " + offrate} \ ~{"--ftabchars " + ftabChars} \ ~{"--kmer-count " + kmerCount} \ ~{"--size-table " + sizeTable} \ - ~{"--conversion-table " + conversionTable} \ - ~{"--taxonomy-tree " + taxonomyTree} \ - ~{"--name-table " + nameTable} \ + --conversion-table ~{conversionTable} \ + --taxonomy-tree ~{taxonomyTree} \ + --name-table ~{nameTable} \ ~{referenceFile} \ ~{outputPrefix + "/" + indexBasename} } @@ -123,9 +123,9 @@ task Classify { centrifuge \ ~{inputFormatOptions[inputFormat]} \ ~{true="--phred64" false="--phred33" phred64} \ - ~{"--min-hitlen " + minHitLength} \ + --min-hitlen ~{minHitLength} \ ~{"--met-file " + outputPrefix + "_alignment_metrics.tsv"} \ - ~{"--threads " + threads} \ + --threads ~{threads} \ ~{"--trim5 " + trim5} \ ~{"--trim3 " + trim3} \ ~{"-k " + reportMaxDistinct} \ From 5fd7356984fc44e7b35e6d33fd09ed84da81ea6b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 13:16:26 +0100 Subject: [PATCH 0273/1208] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d185a8f..872f2173 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Update command section syntax Talon, TranscriptClean and Centrifuge. + Add CCS workflow WDL files (ccs.wdl, lima.wdl, isoseq3.wdl). + Update TALON version to 4.4.2. + The statsPrefix input for umitools dedup is now optional. From d9f557189f2af6bcc8d67ccc38558aea1e5ceede Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Mar 2020 13:25:41 +0100 Subject: [PATCH 0274/1208] Update CHANGELOG. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 872f2173..95e479c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- -+ Update command section syntax Talon, TranscriptClean and Centrifuge. ++ Update command section syntax Minimap2, Talon, TranscriptClean and Centrifuge. + Add CCS workflow WDL files (ccs.wdl, lima.wdl, isoseq3.wdl). + Update TALON version to 4.4.2. + The statsPrefix input for umitools dedup is now optional. From 4dd0e85d5bb8cbb09fe4455ef1a8e12d57c0f49c Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 12 Mar 2020 16:31:57 +0100 Subject: [PATCH 0275/1208] add UMItools separator --- umi-tools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/umi-tools.wdl b/umi-tools.wdl index 2d244c1d..8defc8eb 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -70,6 +70,7 @@ task Dedup { input { File inputBam File inputBamIndex + String umiSeparator = "_" String outputBamPath String? statsPrefix Boolean paired = true @@ -89,6 +90,7 @@ task Dedup { --stdin ~{inputBam} \ --stdout ~{outputBamPath} \ ~{"--output-stats " + statsPrefix} \ + --umi-separator=~{umiSeparator} \ ~{true="--paired" false="" paired} samtools index ~{outputBamPath} ~{outputBamIndex} } @@ -111,6 +113,7 @@ task Dedup { inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} + umiSeparator: {description: "Seperator for UMI sequence, default with '_'", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} memory: {description: "The amount of memory required for the task.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From d161d7b8b98e221b5c9dbcecd9648d137efb69ea Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 12 Mar 2020 16:33:13 +0100 Subject: [PATCH 0276/1208] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d156f54..410a1fea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 2.2.0-dev --------------------------- ++ Add UMI separator. + The statsPrefix input for umitools dedup is now optional. + Allow setting the `--emit-ref-confidence` flag for HaplotypeCaller. + Add `--output-mode` flag to HaplotypeCaller. From 5310007d64c92d491a0c2fd816f71aec52cccaf8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 13 Mar 2020 13:09:40 +0100 Subject: [PATCH 0277/1208] umiSeparator made optional --- umi-tools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 8defc8eb..0a99d2bd 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -70,7 +70,7 @@ task Dedup { input { File inputBam File inputBamIndex - String umiSeparator = "_" + String? umiSeparator String outputBamPath String? statsPrefix Boolean paired = true @@ -90,7 +90,7 @@ task Dedup { --stdin ~{inputBam} \ --stdout ~{outputBamPath} \ ~{"--output-stats " + statsPrefix} \ - --umi-separator=~{umiSeparator} \ + ~{"--umi-separator=" + umiSeparator} ~{true="--paired" false="" paired} samtools index ~{outputBamPath} ~{outputBamIndex} } @@ -113,7 +113,7 @@ task Dedup { inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} - umiSeparator: {description: "Seperator for UMI sequence, default with '_'", category: "advanced"} + umiSeparator: {description: "Seperator for UMI sequence", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} memory: {description: "The amount of memory required for the task.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 915d8c4392818250b972327d0152e5e8cd0bd77b Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 13 Mar 2020 13:55:16 +0100 Subject: [PATCH 0278/1208] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 98cc3e10..dfef7cb2 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 +Subproject commit dfef7cb2555667126dc1751add414527240d71bc From c651dcfb0997c1b12396ed275b1950e2b4e15253 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 13 Mar 2020 13:57:20 +0100 Subject: [PATCH 0279/1208] update version in VERSION and CHANGELOG.md --- CHANGELOG.md | 2 +- VERSION | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95e479c8..f632f117 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 2.2.0-dev +version 3.0.0 --------------------------- + Update command section syntax Minimap2, Talon, TranscriptClean and Centrifuge. + Add CCS workflow WDL files (ccs.wdl, lima.wdl, isoseq3.wdl). diff --git a/VERSION b/VERSION index ccbccc3d..4a36342f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.2.0 +3.0.0 From e1c30a97053f235272a531229bc89d69d3190493 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 13 Mar 2020 14:07:42 +0100 Subject: [PATCH 0280/1208] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 4a36342f..fd2a0186 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.0.0 +3.1.0 From 3fa03e0c864802724e08a3518b473a8379b0a6bf Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Mon, 16 Mar 2020 09:54:48 +0100 Subject: [PATCH 0281/1208] Update CHANGELOG.md Co-Authored-By: DavyCats --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d927f77..b79da9c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 3.0.0 --------------------------- -+ Add optional UMI separator in umi-tools task. ++ Add optional input umiSeparator in umi-tools dedup task. + Update command section syntax Minimap2, Talon, TranscriptClean and Centrifuge. + Add CCS workflow WDL files (ccs.wdl, lima.wdl, isoseq3.wdl). + Update TALON version to 4.4.2. From 67f85befbaab7fc38b4fbdd379879b829e512f51 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Mon, 16 Mar 2020 09:55:14 +0100 Subject: [PATCH 0282/1208] Update umi-tools.wdl Co-Authored-By: DavyCats --- umi-tools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 0a99d2bd..86f5d64e 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -113,10 +113,10 @@ task Dedup { inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} - umiSeparator: {description: "Seperator for UMI sequence", category: "advanced"} + umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} memory: {description: "The amount of memory required for the task.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From 0cedc9aae1af8aa34697aa406d1e474d7965b7da Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 18 Mar 2020 16:03:47 +0100 Subject: [PATCH 0283/1208] more threads for BWA by default --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 665b63ca..40256101 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -28,7 +28,7 @@ task Mem { String outputPath String? readgroup - Int threads = 2 + Int threads = 4 String memory = "32G" String picardXmx = "4G" # A mulled container is needed to have both picard and bwa in one container. From 20b209f2ca4dcc6253f79fec047df9f005d01d70 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 18 Mar 2020 16:04:18 +0100 Subject: [PATCH 0284/1208] use more cores for cutadapt, update cutadapt version, explicitly set low compression level --- cutadapt.wdl | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index f6b8211e..19e4ef0e 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -74,13 +74,10 @@ task Cutadapt { Boolean? zeroCap Boolean? noZeroCap String reportPath = "cutadapt_report.txt" - #Int compressionLevel = 1 # This only affects outputs with the .gz suffix. - # --compression-level has a bug in 2.4 https://github.com/marcelm/cutadapt/pull/388 - #~{"--compression-level=" + compressionLevel} \ - Boolean Z = true # equal to compressionLevel=1 # Fixme: replace once upstream is fixed. - Int cores = 1 + Int compressionLevel = 1 # This only affects outputs with the .gz suffix. + Int cores = 4 String memory = "4G" - String dockerImage = "quay.io/biocontainers/cutadapt:2.4--py37h14c3975_0" + String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) @@ -95,7 +92,6 @@ task Cutadapt { ~{read2outputArg} cutadapt \ ~{"--cores=" + cores} \ - ~{true="-Z" false="" Z} \ ~{true="-a" false="" length(adapter) > 0} ~{sep=" -a " adapter} \ ~{true="-A" false="" length(adapterRead2) > 0} ~{sep=" -A " adapterRead2} \ ~{true="-g" false="" length(front) > 0} ~{sep=" -g " front} \ @@ -103,6 +99,7 @@ task Cutadapt { ~{true="-b" false="" length(anywhere) > 0} ~{sep=" -b " anywhere} \ ~{true="-B" false="" length(anywhereRead2) > 0} ~{sep=" -B " anywhereRead2} \ --output ~{read1output} ~{if defined(read2) then "-p " + realRead2output else ""} \ + --compression-level ~{compressionLevel} \ ~{"--to-short-output " + tooShortOutputPath} \ ~{"--to-short-paired-output " + tooShortPairedOutputPath} \ ~{"--to-long-output " + tooLongOutputPath} \ From e83fd17285b81886c4f75eac4704164dea755dbe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 18 Mar 2020 16:08:52 +0100 Subject: [PATCH 0285/1208] docs for compressionLevel --- cutadapt.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index 19e4ef0e..d04865b6 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -74,6 +74,9 @@ task Cutadapt { Boolean? zeroCap Boolean? noZeroCap String reportPath = "cutadapt_report.txt" + # Cutadapt compresses the zipped output files with a ridiculously high compression level (5 or 6). + # This is not the fast compression preset. It takes up to 400% more CPU time for a 20% reduction in file size. + # Hence we use compression level 1 here. Int compressionLevel = 1 # This only affects outputs with the .gz suffix. Int cores = 4 String memory = "4G" @@ -376,10 +379,8 @@ task Cutadapt { description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", category: "common" } - Z: { - description: "Equivalent to cutadapt's -Z flag.", - category: "advanced" - } + compressionLevel: {description: "The compression level if gzipped output is used.", + category: "advanced"} cores: { description: "The number of cores to use.", category: "advanced" From 751720985c230d556011d29302e7f56820ac9a15 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 18 Mar 2020 16:10:46 +0100 Subject: [PATCH 0286/1208] update default fastqc container since it fixes several bugs --- fastqc.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqc.wdl b/fastqc.wdl index b13c19d1..4d10147c 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -39,7 +39,7 @@ task Fastqc { Int threads = 1 String memory = "4G" - String dockerImage = "quay.io/biocontainers/fastqc:0.11.7--4" + String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile } From ff263a73912d7b3456a6d49cb618c11e33ce84d3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 18 Mar 2020 16:18:06 +0100 Subject: [PATCH 0287/1208] update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f632f117..90d87a81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,14 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +Version 3.1.0-dev +--------------------------- ++ Cutadapt now explicitly calls the `--compression-level` flag with compression + level 1 to prevent cutadapt from using very high gzip compression level 6 + that uses 400% more cpu time. ++ Update default docker image for cutadapt and fastqc. ++ Default number of cores for cutadapt and bwamem to 4 cores. + version 3.0.0 --------------------------- From 4154cee8a96444748322c811ef01fe78557d2430 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 19 Mar 2020 14:41:58 +0100 Subject: [PATCH 0288/1208] rename outputBAM back to outputBam, remove boolean --- samtools.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index de7f2a36..cc106d30 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -336,7 +336,6 @@ task View { File? referenceFasta String outputFileName = "view.bam" Boolean includeHeader = false - Boolean outputBam = false Boolean uncompressedBamOutput = false Int? includeFilter Int? excludeFilter @@ -356,7 +355,6 @@ task View { samtools view -b \ ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ - ~{true="-b " false="" outputBam} \ ~{true="-u " false="" uncompressedBamOutput} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ @@ -368,7 +366,7 @@ task View { } output { - File outputBAM = outputFileName + File outputBam = outputFileName File outputBamIndex = outputIndexPath } From 0e6685122f2e07e8ea75bc3d8d6a5ee2eca338fc Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 19 Mar 2020 14:44:26 +0100 Subject: [PATCH 0289/1208] remove unused includeHeader --- samtools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index cc106d30..a4a893a1 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -335,7 +335,6 @@ task View { File inFile File? referenceFasta String outputFileName = "view.bam" - Boolean includeHeader = false Boolean uncompressedBamOutput = false Int? includeFilter Int? excludeFilter From 006a57e0dfd9b5e65c52c6b4501451bb36e75e39 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 20 Mar 2020 10:23:36 +0100 Subject: [PATCH 0290/1208] Add missing backslash. --- ccs.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccs.wdl b/ccs.wdl index 2ae54cb7..39bb0a19 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -49,7 +49,7 @@ task CCS { --num-threads ~{cores} \ ~{"--report-file " + outputPrefix + ".ccs.report.txt"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ - ~{subreadsFile} + ~{subreadsFile} \ ~{outputPrefix + ".ccs.bam"} } From 5d4e391565159ccc93e51ada9ac4c0632d799783 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 20 Mar 2020 10:24:46 +0100 Subject: [PATCH 0291/1208] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f2559670..6a933899 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> Version 3.1.0-dev --------------------------- ++ CCS: Add missing backslash. + Cutadapt now explicitly calls the `--compression-level` flag with compression level 1 to prevent cutadapt from using very high gzip compression level 6 that uses 400% more cpu time. From 8c8cc282377b4787e87680874f43bf37f4cac7ff Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 20 Mar 2020 16:42:09 +0100 Subject: [PATCH 0292/1208] Update lima to correctly collect bam files. --- lima.wdl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lima.wdl b/lima.wdl index 52f16970..125b12c6 100644 --- a/lima.wdl +++ b/lima.wdl @@ -85,17 +85,18 @@ task Lima { ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{outputPrefix + ".fl.bam"} + ~{basename(outputPrefix) + ".fl.bam"} } output { - File outputFLfile = outputPrefix + "*.bam" - File outputFLindexFile = outputPrefix + "*.bam.pbi" + Array[File] outputFLfile = glob("~{basename(outputPrefix)}*.bam") + Array[File] outputFLindexFile = glob("~{basename(outputPrefix)}*.bam.pbi") + Array[File] outputFLxmlFile = glob("~{basename(outputPrefix)}*.subreadset.xml") File outputSTDERRfile = outputPrefix + ".fl.stderr.log" - File outputJSONfile = outputPrefix + ".fl.json" - File outputCountsFile = outputPrefix + ".fl.lima.counts" - File outputReportFile = outputPrefix + ".fl.lima.report" - File outputSummaryFile = outputPrefix + ".fl.lima.summary" + File outputJSONfile = "~{basename(outputPrefix)}.fl.json" + File outputCountsFile = "~{basename(outputPrefix)}.fl.lima.counts" + File outputReportFile = "~{basename(outputPrefix)}.fl.lima.report" + File outputSummaryFile = "~{basename(outputPrefix)}.fl.lima.summary" } runtime { From 2a82bccc7cf7e0d960dec01d318b817f31ebf450 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 23 Mar 2020 10:29:11 +0100 Subject: [PATCH 0293/1208] Add workaround for glob command in lima. --- CHANGELOG.md | 1 + lima.wdl | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a933899..0291d0ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> Version 3.1.0-dev --------------------------- ++ Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. + Cutadapt now explicitly calls the `--compression-level` flag with compression level 1 to prevent cutadapt from using very high gzip compression level 6 diff --git a/lima.wdl b/lima.wdl index 125b12c6..67d9ff27 100644 --- a/lima.wdl +++ b/lima.wdl @@ -86,6 +86,14 @@ task Lima { ~{inputBamFile} \ ~{barcodeFile} \ ~{basename(outputPrefix) + ".fl.bam"} + + # Move commands below are needed because glob command does not find + # multiple bam/bam.pbi/subreadset.xml files when not located in working + # directory. + mv "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" + mv "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" + mv "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" + mv "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" } output { @@ -93,10 +101,10 @@ task Lima { Array[File] outputFLindexFile = glob("~{basename(outputPrefix)}*.bam.pbi") Array[File] outputFLxmlFile = glob("~{basename(outputPrefix)}*.subreadset.xml") File outputSTDERRfile = outputPrefix + ".fl.stderr.log" - File outputJSONfile = "~{basename(outputPrefix)}.fl.json" - File outputCountsFile = "~{basename(outputPrefix)}.fl.lima.counts" - File outputReportFile = "~{basename(outputPrefix)}.fl.lima.report" - File outputSummaryFile = "~{basename(outputPrefix)}.fl.lima.summary" + File outputJSONfile = outputPrefix + ".fl.json" + File outputCountsFile = outputPrefix + ".fl.lima.counts" + File outputReportFile = outputPrefix + ".fl.lima.report" + File outputSummaryFile = outputPrefix + ".fl.lima.summary" } runtime { From 56c6f23e7b099c42994364a9cb8b0646e4159373 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 24 Mar 2020 13:40:29 +0100 Subject: [PATCH 0294/1208] Fix --min-polya-length argument syntax & add workaround for glob command not locating files in output directory. --- CHANGELOG.md | 1 + isoseq3.wdl | 34 +++++++++++++++++++++++++--------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0291d0ae..9c9eecd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> Version 3.1.0-dev --------------------------- ++ Isoseq3: Fix --min-polya-length argument syntax & add workaround for glob command not locating files in output directory. + Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. + Cutadapt now explicitly calls the `--compression-level` flag with compression diff --git a/isoseq3.wdl b/isoseq3.wdl index d7111c9a..9d5f217e 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -34,27 +34,43 @@ task Refine { String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" + + # Create a unique output name base on the input bam file. + bamBasename="$(basename ~{inputBamFile})" + bamNewName="${bamBasename/fl/flnc}" + folderDirname="$(dirname ~{outputPrefix})" + combinedOutput="${folderDirname}/${bamNewName}" + isoseq3 refine \ - --min-poly-length ~{minPolyAlength} \ + --min-polya-length ~{minPolyAlength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ ~{"--log-file " + outputPrefix + ".flnc.stderr.log"} \ ~{inputBamFile} \ ~{primerFile} \ - ~{outputPrefix + ".flnc.bam"} - } + ${bamNewName} + + # Copy commands below are needed because glob command does not find + # multiple bam/bam.pbi/consensusreadset.xml/filter_summary.json/report.csv + # files when not located in working directory. + cp "${bamNewName}" "${combinedOutput}" + cp "${bamNewName}.pbi" "${combinedOutput}.pbi" + cp "${bamNewName/bam/consensusreadset}.xml" "${combinedOutput/bam/consensusreadset}.xml" + cp "${bamNewName/bam/filter_summary}.json" "${combinedOutput/bam/filter_summary}.json" + cp "${bamNewName/bam/report}.csv" "${combinedOutput/bam/report}.csv" + >>> output { - File outputFLfile = outputPrefix + ".flnc.bam" - File outputFLindexFile = outputPrefix + ".flnc.bam.pbi" + Array[File] outputFLfile = glob("~{basename(outputPrefix)}*.bam") + Array[File] outputFLindexFile = glob("~{basename(outputPrefix)}*.bam.pbi") + Array[File] outputConsensusReadsetFile = glob("~{basename(outputPrefix)}*.consensusreadset.xml") + Array[File] outputFilterSummaryFile = glob("~{basename(outputPrefix)}*.filter_summary.json") + Array[File] outputReportFile = glob("~{basename(outputPrefix)}*.report.csv") File outputSTDERRfile = outputPrefix + ".flnc.stderr.log" - File outputConsensusReadsetFile = outputPrefix + ".consensusreadset.xml" - File outputFilterSummaryFile = outputPrefix + ".filter_summary.json" - File outputReportFile = outputPrefix + ".report.csv" } runtime { From efb8bc226b3c9dbfce9080ae68d6e0e8ba7e2989 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 24 Mar 2020 13:45:49 +0100 Subject: [PATCH 0295/1208] Update output names. --- isoseq3.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isoseq3.wdl b/isoseq3.wdl index 9d5f217e..ff6e3368 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -65,8 +65,8 @@ task Refine { >>> output { - Array[File] outputFLfile = glob("~{basename(outputPrefix)}*.bam") - Array[File] outputFLindexFile = glob("~{basename(outputPrefix)}*.bam.pbi") + Array[File] outputFLNCfile = glob("~{basename(outputPrefix)}*.bam") + Array[File] outputFLNCindexFile = glob("~{basename(outputPrefix)}*.bam.pbi") Array[File] outputConsensusReadsetFile = glob("~{basename(outputPrefix)}*.consensusreadset.xml") Array[File] outputFilterSummaryFile = glob("~{basename(outputPrefix)}*.filter_summary.json") Array[File] outputReportFile = glob("~{basename(outputPrefix)}*.report.csv") From 5a72bf8658a7ecab26f3bfffea018c39fede5905 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 24 Mar 2020 13:46:14 +0100 Subject: [PATCH 0296/1208] Update parameter_meta. --- isoseq3.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isoseq3.wdl b/isoseq3.wdl index ff6e3368..549bc40e 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -92,8 +92,8 @@ task Refine { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputFLfile: {description: "Filtered reads output file."} - outputFLindexFile: {description: "Index of filtered reads output file."} + outputFLNCfile: {description: "Filtered reads output file."} + outputFLNCindexFile: {description: "Index of filtered reads output file."} outputSTDERRfile: {description: "Refine STDERR log file."} outputConsensusReadsetFile: {description: "Refine consensus readset XML file."} outputFilterSummaryFile: {description: "Refine summary file."} From 93c0048e5c5181dd47b97bc2942b0f01d685c300 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 24 Mar 2020 13:57:12 +0100 Subject: [PATCH 0297/1208] Separate additions to isoseq3 into two lines. --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c9eecd5..2d331118 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,8 @@ that users understand how the changes affect the new version. --> Version 3.1.0-dev --------------------------- -+ Isoseq3: Fix --min-polya-length argument syntax & add workaround for glob command not locating files in output directory. ++ Isoseq3: Add workaround for glob command not locating files in output directory. ++ Isoseq3: Fix --min-polya-length argument syntax. + Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. + Cutadapt now explicitly calls the `--compression-level` flag with compression From 38a36a81990999b226a268590ec8d05194e8dd05 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 25 Mar 2020 10:03:43 +0100 Subject: [PATCH 0298/1208] Make sure stderr log file from Refine is unique and not overwritten. --- CHANGELOG.md | 3 ++- isoseq3.wdl | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d331118..02b87873 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,8 @@ that users understand how the changes affect the new version. --> Version 3.1.0-dev --------------------------- -+ Isoseq3: Add workaround for glob command not locating files in output directory. ++ Isoseq3: Make sure stderr log file from Refine is unique and not overwritten. ++ Isoseq3: Add workaround in Refine for glob command not locating files in output directory. + Isoseq3: Fix --min-polya-length argument syntax. + Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. diff --git a/isoseq3.wdl b/isoseq3.wdl index 549bc40e..8cc0db8f 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -49,28 +49,28 @@ task Refine { ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - ~{"--log-file " + outputPrefix + ".flnc.stderr.log"} \ + --log-file "${bamNewName}.stderr.log" \ ~{inputBamFile} \ ~{primerFile} \ ${bamNewName} - # Copy commands below are needed because glob command does not find - # multiple bam/bam.pbi/consensusreadset.xml/filter_summary.json/report.csv - # files when not located in working directory. + # Copy commands below are needed because naming schema for Refine output + # can not be correctly handled in the WDL output section. cp "${bamNewName}" "${combinedOutput}" cp "${bamNewName}.pbi" "${combinedOutput}.pbi" cp "${bamNewName/bam/consensusreadset}.xml" "${combinedOutput/bam/consensusreadset}.xml" cp "${bamNewName/bam/filter_summary}.json" "${combinedOutput/bam/filter_summary}.json" cp "${bamNewName/bam/report}.csv" "${combinedOutput/bam/report}.csv" + cp "${bamNewName}.stderr.log" "${combinedOutput}.stderr.log" >>> output { - Array[File] outputFLNCfile = glob("~{basename(outputPrefix)}*.bam") - Array[File] outputFLNCindexFile = glob("~{basename(outputPrefix)}*.bam.pbi") - Array[File] outputConsensusReadsetFile = glob("~{basename(outputPrefix)}*.consensusreadset.xml") - Array[File] outputFilterSummaryFile = glob("~{basename(outputPrefix)}*.filter_summary.json") - Array[File] outputReportFile = glob("~{basename(outputPrefix)}*.report.csv") - File outputSTDERRfile = outputPrefix + ".flnc.stderr.log" + Array[File] outputFLNCfile = glob("*.bam") + Array[File] outputFLNCindexFile = glob("*.bam.pbi") + Array[File] outputConsensusReadsetFile = glob("*.consensusreadset.xml") + Array[File] outputFilterSummaryFile = glob("*.filter_summary.json") + Array[File] outputReportFile = glob("*.report.csv") + Array[File] outputSTDERRfile = glob("*.stderr.log") } runtime { From de05f71c0bcc0d0aaa7f45d8ee08fbe5cfde3f5f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 25 Mar 2020 10:17:16 +0100 Subject: [PATCH 0299/1208] Remove outputPrefix variable from output section. --- CHANGELOG.md | 1 + lima.wdl | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02b87873..94fb07f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> Version 3.1.0-dev --------------------------- ++ Lima: Remove outputPrefix variable from output section. + Isoseq3: Make sure stderr log file from Refine is unique and not overwritten. + Isoseq3: Add workaround in Refine for glob command not locating files in output directory. + Isoseq3: Fix --min-polya-length argument syntax. diff --git a/lima.wdl b/lima.wdl index 67d9ff27..619764fc 100644 --- a/lima.wdl +++ b/lima.wdl @@ -97,9 +97,9 @@ task Lima { } output { - Array[File] outputFLfile = glob("~{basename(outputPrefix)}*.bam") - Array[File] outputFLindexFile = glob("~{basename(outputPrefix)}*.bam.pbi") - Array[File] outputFLxmlFile = glob("~{basename(outputPrefix)}*.subreadset.xml") + Array[File] outputFLfile = glob("*.bam") + Array[File] outputFLindexFile = glob("*.bam.pbi") + Array[File] outputFLxmlFile = glob("*.subreadset.xml") File outputSTDERRfile = outputPrefix + ".fl.stderr.log" File outputJSONfile = outputPrefix + ".fl.json" File outputCountsFile = outputPrefix + ".fl.lima.counts" From 8fc0fff11827a9f2efde980d1eaeba0198947d49 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 25 Mar 2020 12:07:02 +0100 Subject: [PATCH 0300/1208] Add missing output to parameter_meta. --- CHANGELOG.md | 1 + lima.wdl | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94fb07f6..298b49dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> Version 3.1.0-dev --------------------------- ++ Lima: Add missing output to parameter_meta. + Lima: Remove outputPrefix variable from output section. + Isoseq3: Make sure stderr log file from Refine is unique and not overwritten. + Isoseq3: Add workaround in Refine for glob command not locating files in output directory. diff --git a/lima.wdl b/lima.wdl index 619764fc..78bab032 100644 --- a/lima.wdl +++ b/lima.wdl @@ -147,6 +147,7 @@ task Lima { # outputs outputFLfile: {description: "Demultiplexed reads output file."} outputFLindexFile: {description: "Index of demultiplexed reads output file."} + outputFLxmlFile: {description: "XML file of the subreadset(s)."} outputSTDERRfile: {description: "Lima STDERR log file."} outputJSONfile: {description: "Lima JSON file."} outputCountsFile: {description: "Lima counts file."} From b3d42bae0684cf6094ff4a60454c9cdb9c1818b5 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 25 Mar 2020 12:08:37 +0100 Subject: [PATCH 0301/1208] Update parameter_meta. --- lima.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lima.wdl b/lima.wdl index 78bab032..747959a1 100644 --- a/lima.wdl +++ b/lima.wdl @@ -145,8 +145,8 @@ task Lima { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputFLfile: {description: "Demultiplexed reads output file."} - outputFLindexFile: {description: "Index of demultiplexed reads output file."} + outputFLfile: {description: "Demultiplexed reads output file(s)."} + outputFLindexFile: {description: "Index of demultiplexed reads output file(s)."} outputFLxmlFile: {description: "XML file of the subreadset(s)."} outputSTDERRfile: {description: "Lima STDERR log file."} outputJSONfile: {description: "Lima JSON file."} From 2e53d9be2c8ded90fc7e6731355cf1058fc386b2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 25 Mar 2020 17:04:35 +0100 Subject: [PATCH 0302/1208] update bwa kit defaults --- CHANGELOG.md | 2 ++ bwa.wdl | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94fb07f6..0d2a873b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. --> Version 3.1.0-dev --------------------------- ++ Default threads for BWA in bwa.Kit task: 4. Samtools sort in the + same task: 1. Output BAM compression level to 1. + Lima: Remove outputPrefix variable from output section. + Isoseq3: Make sure stderr log file from Refine is unique and not overwritten. + Isoseq3: Add workaround in Refine for glob command not locating files in output directory. diff --git a/bwa.wdl b/bwa.wdl index 40256101..f4c091ef 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -88,9 +88,15 @@ task Kit { String? readgroup Boolean sixtyFour = false - Int threads = 2 - Int sortThreads = 2 - String memory = "10G" + Int threads = 4 + # Samtools uses *additional* threads. So by default this option should + # not be used. + Int? sortThreads + # Compression uses zlib. Higher than level 2 causes enormous slowdowns. + # GATK/Picard default is level 2. + String sortMemoryPerThread = "4G" + Int compressionLevel = 1 + String memory = "32G" String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" } @@ -108,8 +114,9 @@ task Kit { -p ~{outputPrefix}.hla \ ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ - -@ ~{sortThreads} \ - -m1G \ + ~{"-@ " + sortThreads} \ + -m ~{sortMemoryPerThread} \ + -l ~{compressionLevel} \ - \ -o ~{outputPrefix}.aln.bam samtools index ~{outputPrefix}.aln.bam ~{outputPrefix}.aln.bai @@ -121,7 +128,7 @@ task Kit { } runtime { - cpu: threads + sortThreads + cpu: threads + 1 # One thread for bwa-postalt + samtools. memory: memory docker: dockerImage } From 8318491aee0351c7b7beb79d869f6b024d341d74 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 25 Mar 2020 17:08:13 +0100 Subject: [PATCH 0303/1208] update parameter_meta --- bwa.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index f4c091ef..1ada039e 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -142,7 +142,9 @@ task Kit { readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} + sortMemoryPerThread: {description: "The amount of memory for each sorting thread.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 310d63dad04aefc24ae4ebb9a67f2ba1be1ba2a6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 26 Mar 2020 10:36:37 +0100 Subject: [PATCH 0304/1208] Update bwa.wdl Co-Authored-By: Jasper --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 1ada039e..fec2b09f 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -144,7 +144,7 @@ task Kit { threads: {description: "The number of threads to use for alignment.", category: "advanced"} sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} sortMemoryPerThread: {description: "The amount of memory for each sorting thread.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 9febd09448ec17d69794ff665e373432d0363ea4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 26 Mar 2020 13:39:16 +0100 Subject: [PATCH 0305/1208] forgotten backslash --- umi-tools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 86f5d64e..07518e57 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -90,7 +90,7 @@ task Dedup { --stdin ~{inputBam} \ --stdout ~{outputBamPath} \ ~{"--output-stats " + statsPrefix} \ - ~{"--umi-separator=" + umiSeparator} + ~{"--umi-separator=" + umiSeparator} \ ~{true="--paired" false="" paired} samtools index ~{outputBamPath} ~{outputBamIndex} } From a28a3634140225a12f9325e24a1cc599b7bca4ff Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 30 Mar 2020 09:48:53 +0200 Subject: [PATCH 0306/1208] stable changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3050bd5..4c3a3744 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -Version 3.1.0-dev +version 3.1.0 --------------------------- + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. From 6a93ad835ce49048b03c97020cbe5d387dc7fec2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 30 Mar 2020 10:00:14 +0200 Subject: [PATCH 0307/1208] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index fd2a0186..944880fa 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0 +3.2.0 From 147a115c78c883770b72013efd9cfdec68434f06 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Mar 2020 10:38:19 +0200 Subject: [PATCH 0308/1208] add task for STAR index generation --- star.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/star.wdl b/star.wdl index e1e55a26..5de925b5 100644 --- a/star.wdl +++ b/star.wdl @@ -20,6 +20,70 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task GenomeGenerate { + input { + String genomeDir = "STAR_index" + File referenceFasta + File? referenceGtf + Int? sjdbOverhang + + Int threads = 4 + String memory = "60G" + String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" + } + + command { + set -e + mkdir -p "$(dirname ~{genomeDir})" + STAR \ + --runMode genomeGenerate \ + --runThreadN ~{threads} \ + --genomeDir ~{genomeDir} \ + --genomeFastaFiles ~{referenceFasta} \ + ~{"--sjdbGTFfile " + referenceGtf} \ + ~{"--sjdbOverhang " + sjdbOverhang} + } + + output { + File chrLength = "~{genomeDir}/chrLength.txt" + File chrNameLength = "~{genomeDir}/chrNameLength.txt" + File chrName = "~{genomeDir}/chrName.txt" + File chrStart = "~{genomeDir}/chrStart.txt" + File genome = "~{genomeDir}/genome.txt" + File genomeParameters = "~{genomeDir}/genomeParameters.txt" + File sa = "~{genomeDir}/SA" + File saIndex = "~{genomeDir}/SAindex" + File? exonGeTrInfo = "~{genomeDir}/exonGeTrInfo.tab" + File? exonInfo = "~{genomeDir}/exonInfo.tab" + File? geneInfo = "~{genomeDir}/geneInfo.tab" + File? sjdbInfo = "~{genomeDir}/sjdbInfo.txt" + File? sjdbListFromGtfOut = "~{genomeDir}/sjdbList.fromGTF.out.tab" + File? sjdbListOut = "~{genomeDir}/sjdbList.out.tab" + File? transcriptInfo = "~{genomeDir}/transcriptInfo.tab" + Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, chrStart, genome, genomeParameters, + sa, saIndex, exonGeTrInfo, exonInfo, geneInfo, sjdbInfo, sjdbListFromGtfOut, + sjdbListOut, transcriptInfo]) + } + + runtime { + cpu: threads + memory: memory + docker: dockerImage + } + + parameter_meta { + genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} + referenceFasta: {description: "The reference Fasta file.", category: "required"} + referenceGtf: {description: "The reference GTF file.", category: "common"} + sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Star { input { Array[File]+ inputR1 From cc40f3aa341f5d8fbf5b02cb92cf5964af82be1d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Mar 2020 10:40:40 +0200 Subject: [PATCH 0309/1208] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3a3744..21019ef2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,10 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 3.2.0 +--------------------------- ++ Added STAR GenomeGenerate task. + version 3.1.0 --------------------------- + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the From fe306e5f31d08fadc8482d899d3eefb35e46d5fe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Apr 2020 16:19:14 +0200 Subject: [PATCH 0310/1208] add runtime_minutes and adjust memory of star index task --- star.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index 5de925b5..e272cd02 100644 --- a/star.wdl +++ b/star.wdl @@ -28,7 +28,8 @@ task GenomeGenerate { Int? sjdbOverhang Int threads = 4 - String memory = "60G" + String memory = "32G" + Int minutesPerGB = 240 String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -68,6 +69,7 @@ task GenomeGenerate { runtime { cpu: threads memory: memory + runtime_minutes: ceil(size(referenceFasta, "G") * minutesPerGB / threads) docker: dockerImage } From 619781f26255421acdaa64daa930c279219eba5b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 2 Apr 2020 14:17:08 +0200 Subject: [PATCH 0311/1208] Add fastqc requirements --- fastqc.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fastqc.wdl b/fastqc.wdl index 4d10147c..7e693f67 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -38,7 +38,7 @@ task Fastqc { String? dir Int threads = 1 - String memory = "4G" + String memory = "1G" String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile @@ -80,10 +80,12 @@ task Fastqc { Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else NoneArray } + Int estimatedRuntime = 1 + ceil(size(seqFile, "G")) * 4 runtime { cpu: threads memory: memory docker: dockerImage + runtime_minutes: estimatedRuntime } parameter_meta { From 802b65e68ad7b8bcd700649eabd62cbc66b3944c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 2 Apr 2020 14:25:09 +0200 Subject: [PATCH 0312/1208] make estimates overrideable --- fastqc.wdl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fastqc.wdl b/fastqc.wdl index 7e693f67..b1a587ca 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -38,11 +38,14 @@ task Fastqc { String? dir Int threads = 1 - String memory = "1G" + String? memory + Int? runtimeMinutes String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile } + String estimatedMemoryMB = "~{250 + 250 * threads}M" + Int estimatedRuntimeMin = 1 + ceil(size(seqFile, "G")) * 4 # Chops of the .gz extension if present. # The Basename needs to be taken here. Otherwise paths might differ between similar jobs. @@ -80,12 +83,11 @@ task Fastqc { Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else NoneArray } - Int estimatedRuntime = 1 + ceil(size(seqFile, "G")) * 4 runtime { cpu: threads - memory: memory + memory: select_first([memory, estimatedMemoryMB]) docker: dockerImage - runtime_minutes: estimatedRuntime + runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) } parameter_meta { From 4fb4583e643c56b893f6970671d0752616f63b01 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 2 Apr 2020 14:37:13 +0200 Subject: [PATCH 0313/1208] estimate cutadapt memory and runtime --- cutadapt.wdl | 8 ++++++-- fastqc.wdl | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index d04865b6..bb04e337 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -79,9 +79,12 @@ task Cutadapt { # Hence we use compression level 1 here. Int compressionLevel = 1 # This only affects outputs with the .gz suffix. Int cores = 4 - String memory = "4G" + String? memory + Int? runtimeMinutes String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" } + Int estimatedRuntimeMin = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) + String estimatedMemory = "~{300 + 100 * cores}M" String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) String read2outputArg = if (defined(read2)) @@ -166,7 +169,8 @@ task Cutadapt { runtime { cpu: cores - memory: memory + memory: select_first([memory, estimatedMemory]) + runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) docker: dockerImage } diff --git a/fastqc.wdl b/fastqc.wdl index b1a587ca..adc103e8 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -44,7 +44,7 @@ task Fastqc { Array[File]? NoneArray File? NoneFile } - String estimatedMemoryMB = "~{250 + 250 * threads}M" + String estimatedMemory = "~{250 + 250 * threads}M" Int estimatedRuntimeMin = 1 + ceil(size(seqFile, "G")) * 4 # Chops of the .gz extension if present. @@ -85,7 +85,7 @@ task Fastqc { runtime { cpu: threads - memory: select_first([memory, estimatedMemoryMB]) + memory: select_first([memory, estimatedMemory]) docker: dockerImage runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) } From 3d59d9b820c25fcdd51ac537767f73c695850b6d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 2 Apr 2020 15:24:44 +0200 Subject: [PATCH 0314/1208] rename time parameter --- cutadapt.wdl | 6 +++--- fastqc.wdl | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index bb04e337..5f51e9ca 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -80,10 +80,10 @@ task Cutadapt { Int compressionLevel = 1 # This only affects outputs with the .gz suffix. Int cores = 4 String? memory - Int? runtimeMinutes + Int? timeMinutes String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" } - Int estimatedRuntimeMin = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) + Int estimatedTimeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String estimatedMemory = "~{300 + 100 * cores}M" String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) @@ -170,7 +170,7 @@ task Cutadapt { runtime { cpu: cores memory: select_first([memory, estimatedMemory]) - runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) + time_minutes: select_first([timeMinutes, estimatedTimeMinutes]) docker: dockerImage } diff --git a/fastqc.wdl b/fastqc.wdl index adc103e8..fc935890 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -39,13 +39,13 @@ task Fastqc { Int threads = 1 String? memory - Int? runtimeMinutes + Int? timeMinutes String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile } String estimatedMemory = "~{250 + 250 * threads}M" - Int estimatedRuntimeMin = 1 + ceil(size(seqFile, "G")) * 4 + Int estimatedTimeMinutes = 1 + ceil(size(seqFile, "G")) * 4 # Chops of the .gz extension if present. # The Basename needs to be taken here. Otherwise paths might differ between similar jobs. @@ -87,7 +87,7 @@ task Fastqc { cpu: threads memory: select_first([memory, estimatedMemory]) docker: dockerImage - runtime_minutes: select_first([runtimeMinutes, estimatedRuntimeMin]) + time_minutes: select_first([timeMinutes, estimatedTimeMinutes]) } parameter_meta { From 6657f26cb4aaed1a7d142473b353750c3290adc7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 2 Apr 2020 15:35:49 +0200 Subject: [PATCH 0315/1208] runtime_minutes -> time_minutes --- star.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/star.wdl b/star.wdl index e272cd02..4c407331 100644 --- a/star.wdl +++ b/star.wdl @@ -29,7 +29,7 @@ task GenomeGenerate { Int threads = 4 String memory = "32G" - Int minutesPerGB = 240 + Int timeMinutes = ceil(size(referenceFasta, "G") * 240 / threads) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -69,7 +69,7 @@ task GenomeGenerate { runtime { cpu: threads memory: memory - runtime_minutes: ceil(size(referenceFasta, "G") * minutesPerGB / threads) + time_minutes: timeMinutes docker: dockerImage } From 5b827f8a90d7a0efe2764a6afcf7d23c4fec24fc Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 2 Apr 2020 16:16:20 +0200 Subject: [PATCH 0316/1208] add smoove.wdl --- smoove.wdl | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 smoove.wdl diff --git a/smoove.wdl b/smoove.wdl new file mode 100644 index 00000000..93523f3d --- /dev/null +++ b/smoove.wdl @@ -0,0 +1,68 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task CallSV { + input { + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputDir = "./smoove" + + String memory = "15G" + String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputDir})" + smoove call \ + --outdir ~{outputDir} \ + --name ~{sample} \ + --fasta ~{referenceFasta} \ + ~{bamFile} + } + + output { + File smooveVcf = outputDir + "/" + sample + "-smoove.vcf.gz" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } + outputDir: {description: "The location the output VCF file should be written.", category: "common"} + sample: {description: "The name of the sample", category: "required"} + memory: {description: "The memory required to run the programs", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 7a1ff5d1fcb5fa60c64494212c30165891dbe175 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 2 Apr 2020 16:17:55 +0200 Subject: [PATCH 0317/1208] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3a3744..797ca07b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Add WDL task for smoove (lumpy) sv-caller. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. + Lima: Add missing output to parameter_meta. From 843055e1278addf3ea2c3f180225347e917a4b16 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Apr 2020 14:42:06 +0200 Subject: [PATCH 0318/1208] add time_minutes part I --- biowdl.wdl | 4 ++++ bowtie.wdl | 4 +++- chunked-scatter.wdl | 2 ++ collect-columns.wdl | 4 +++- common.wdl | 5 +++++ umi-tools.wdl | 2 ++ 6 files changed, 19 insertions(+), 2 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index 7aa68b27..c8855406 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -31,6 +31,8 @@ task InputConverter { Boolean skipFileCheck=true Boolean checkFileMd5sums=false Boolean old=false + + Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } @@ -50,6 +52,8 @@ task InputConverter { } runtime { + memory: "2G" + time_minutes: timeMinutes docker: dockerImage } diff --git a/bowtie.wdl b/bowtie.wdl index 18fd6146..a47be1da 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -37,7 +37,8 @@ task Bowtie { String? samRG Int threads = 1 - String memory = "16G" + Int timeMinutes = ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) + String memory = "10G" String picardXmx = "4G" # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" @@ -78,6 +79,7 @@ task Bowtie { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 619292d9..2e5f6bdc 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -28,6 +28,7 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile + Int timeMinutes String dockerImage = "quay.io/biocontainers/chunked-scatter:0.1.0--py_0" } @@ -48,6 +49,7 @@ task ChunkedScatter { runtime { memory: "4G" + timeMinutes: 5 docker: dockerImage } diff --git a/collect-columns.wdl b/collect-columns.wdl index 8b1fa387..cc841521 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -33,6 +33,8 @@ task CollectColumns { File? referenceGtf String? featureAttribute + Int memoryGb = 4 + ceil(0.5 * length(inputTables)) + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/collect-columns:0.2.0--py_1" } @@ -56,7 +58,7 @@ task CollectColumns { File outputTable = outputPath } - Int memoryGb = 4 + ceil(0.5 * length(inputTables)) + runtime { memory: "~{memoryGb}G" diff --git a/common.wdl b/common.wdl index ef86abcc..516ce144 100644 --- a/common.wdl +++ b/common.wdl @@ -210,6 +210,9 @@ task YamlToJson { input { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" + + Int timeMinutes = 2 + String memory = "1G" # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } @@ -230,6 +233,8 @@ task YamlToJson { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } diff --git a/umi-tools.wdl b/umi-tools.wdl index 07518e57..415081fc 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -76,6 +76,7 @@ task Dedup { Boolean paired = true String memory = "5G" + Int timeMinutes = ceil(size(inputBam, "G") * 18) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" @@ -105,6 +106,7 @@ task Dedup { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } From 44f87bdea0fa003149e3c9c85e33b59ba164ffa2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Apr 2020 14:50:11 +0200 Subject: [PATCH 0319/1208] move estimates to runtime section --- cutadapt.wdl | 6 ++---- fastqc.wdl | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index 5f51e9ca..fbd51384 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -83,8 +83,6 @@ task Cutadapt { Int? timeMinutes String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" } - Int estimatedTimeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String estimatedMemory = "~{300 + 100 * cores}M" String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) String read2outputArg = if (defined(read2)) @@ -169,8 +167,8 @@ task Cutadapt { runtime { cpu: cores - memory: select_first([memory, estimatedMemory]) - time_minutes: select_first([timeMinutes, estimatedTimeMinutes]) + memory: select_first([memory, "~{300 + 100 * cores}M"]) + time_minutes: select_first([timeMinutes, 1 + ceil(size([read1, read2], "G") * 12.0 / cores)]) docker: dockerImage } diff --git a/fastqc.wdl b/fastqc.wdl index fc935890..3e461053 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -44,8 +44,6 @@ task Fastqc { Array[File]? NoneArray File? NoneFile } - String estimatedMemory = "~{250 + 250 * threads}M" - Int estimatedTimeMinutes = 1 + ceil(size(seqFile, "G")) * 4 # Chops of the .gz extension if present. # The Basename needs to be taken here. Otherwise paths might differ between similar jobs. @@ -85,9 +83,9 @@ task Fastqc { runtime { cpu: threads - memory: select_first([memory, estimatedMemory]) + memory: select_first([memory, "~{250 + 250 * threads}M"]) docker: dockerImage - time_minutes: select_first([timeMinutes, estimatedTimeMinutes]) + time_minutes: select_first([timeMinutes, 1 + ceil(size(seqFile, "G")) * 4]) } parameter_meta { From 2c9810f11aef4f7a86381547982feea681f2bbc9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 6 Apr 2020 16:50:28 +0200 Subject: [PATCH 0320/1208] add time_minutes part II --- biopet/biopet.wdl | 8 +++++--- biowdl.wdl | 3 ++- bowtie.wdl | 1 + bwa.wdl | 12 +++++++++--- chunked-scatter.wdl | 4 ++-- collect-columns.wdl | 3 +++ common.wdl | 4 +++- gatk.wdl | 20 ++++++++++++++------ picard.wdl | 21 +++++++++++++++------ samtools.wdl | 7 ++++++- 10 files changed, 60 insertions(+), 23 deletions(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index ec64fb4b..7634e7b0 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -264,8 +264,9 @@ task ScatterRegions { File? bamFile File? bamIndex - String memory = "24G" - String javaXmx = "8G" + String memory = "1G" + String javaXmx = "500M" + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" } @@ -277,7 +278,7 @@ task ScatterRegions { command <<< set -e -o pipefail mkdir -p ~{outputDirPath} - biopet-scatterregions -Xmx~{javaXmx} \ + biopet-scatterregions -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -R ~{referenceFasta} \ -o ~{outputDirPath} \ ~{"-s " + scatterSize} \ @@ -306,6 +307,7 @@ task ScatterRegions { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } diff --git a/biowdl.wdl b/biowdl.wdl index c8855406..7661a592 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -32,7 +32,7 @@ task InputConverter { Boolean checkFileMd5sums=false Boolean old=false - Int timeMinutes = 2 + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } @@ -66,6 +66,7 @@ task InputConverter { checkFileMd5sums: {description: "Whether or not the MD5 sums of the files mentioned in the samplesheet should be checked.", category: "advanced"} old: {description: "Whether or not the old samplesheet format should be used.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/bowtie.wdl b/bowtie.wdl index a47be1da..94a809fa 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -100,6 +100,7 @@ task Bowtie { category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/bwa.wdl b/bwa.wdl index fec2b09f..247386d8 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,8 +29,9 @@ task Mem { String? readgroup Int threads = 4 - String memory = "32G" + String memory = "20G" String picardXmx = "4G" + Int timeMinutes = ceil(size([read1, read2], "G") * 200 / threads) # A mulled container is needed to have both picard and bwa in one container. # This container contains: picard (2.18.7), bwa (0.7.17-r1188) String dockerImage = "quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:43ec6124f9f4f875515f9548733b8b4e5fed9aa6-0" @@ -45,7 +46,7 @@ task Mem { ~{bwaIndex.fastaFile} \ ~{read1} \ ~{read2} \ - | picard -Xmx~{picardXmx} SortSam \ + | picard -Xmx~{picardXmx} -XX:ParallelGCThreads=1 SortSam \ INPUT=/dev/stdin \ OUTPUT=~{outputPath} \ SORT_ORDER=coordinate \ @@ -60,6 +61,7 @@ task Mem { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -74,6 +76,7 @@ task Mem { memory: {description: "The amount of memory this job will use.", category: "advanced"} picardXmx: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -96,7 +99,8 @@ task Kit { # GATK/Picard default is level 2. String sortMemoryPerThread = "4G" Int compressionLevel = 1 - String memory = "32G" + String memory = "20G" + Int timeMinutes = ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" } @@ -130,6 +134,7 @@ task Kit { runtime { cpu: threads + 1 # One thread for bwa-postalt + samtools. memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -146,6 +151,7 @@ task Kit { sortMemoryPerThread: {description: "The amount of memory for each sorting thread.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 2e5f6bdc..1b81687a 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -28,7 +28,7 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile - Int timeMinutes + Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:0.1.0--py_0" } @@ -49,7 +49,7 @@ task ChunkedScatter { runtime { memory: "4G" - timeMinutes: 5 + time_minutes: timeMinutes docker: dockerImage } diff --git a/collect-columns.wdl b/collect-columns.wdl index cc841521..6855b6c0 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -62,6 +62,7 @@ task CollectColumns { runtime { memory: "~{memoryGb}G" + time_minutes: timeMinutes docker: dockerImage } @@ -86,6 +87,8 @@ task CollectColumns { category: "advanced"} featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} + memoryGb: {description: "The maximum amount of memory the job will need in GB", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/common.wdl b/common.wdl index 516ce144..be60f8cf 100644 --- a/common.wdl +++ b/common.wdl @@ -211,7 +211,7 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - Int timeMinutes = 2 + Int timeMinutes = 1 String memory = "1G" # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" @@ -241,6 +241,8 @@ task YamlToJson { parameter_meta { yaml: {description: "The YAML file to convert.", category: "required"} outputJson: {description: "The location the output JSON file should be written to.", category: "advanced"} + memory: {description: "The maximum aount of memroy the job will need.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/gatk.wdl b/gatk.wdl index b730cbee..c9c1d5eb 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -90,8 +90,9 @@ task ApplyBQSR { File referenceFastaDict File referenceFastaFai - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -139,6 +140,7 @@ task ApplyBQSR { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -159,15 +161,16 @@ task BaseRecalibrator { File referenceFastaDict File referenceFastaFai - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{recalibrationReportPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ BaseRecalibrator \ -R ~{referenceFasta} \ -I ~{inputBam} \ @@ -184,6 +187,7 @@ task BaseRecalibrator { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -205,6 +209,7 @@ task BaseRecalibrator { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -688,15 +693,16 @@ task GatherBqsrReports { Array[File] inputBQSRreports String outputReportPath - String memory = "12G" - String javaXmx = "4G" + String memory = "1G" + String javaXmx = "500M" + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputReportPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherBQSRReports \ -I ~{sep=' -I ' inputBQSRreports} \ -O ~{outputReportPath} @@ -708,6 +714,7 @@ task GatherBqsrReports { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -718,6 +725,7 @@ task GatherBqsrReports { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/picard.wdl b/picard.wdl index 7df96aa9..895acea0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -83,8 +83,9 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - String memory = "32G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -92,7 +93,7 @@ task CollectMultipleMetrics { command { set -e mkdir -p "$(dirname ~{basename})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ CollectMultipleMetrics \ I=~{inputBam} \ R=~{referenceFasta} \ @@ -153,6 +154,7 @@ task CollectMultipleMetrics { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -188,6 +190,7 @@ task CollectMultipleMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -320,15 +323,16 @@ task GatherBamFiles { Array[File]+ inputBamsIndex String outputBamPath - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = ceil(size(inputBams, "G") * 0.5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { set -e mkdir -p "$(dirname ~{outputBamPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ @@ -344,6 +348,7 @@ task GatherBamFiles { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -356,6 +361,7 @@ task GatherBamFiles { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -412,8 +418,9 @@ task MarkDuplicates { String outputBamPath String metricsPath - String memory = "24G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = ceil(size(inputBams, "G")* 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" # The program default for READ_NAME_REGEX is appropriate in nearly every case. @@ -431,7 +438,7 @@ task MarkDuplicates { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ MarkDuplicates \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ @@ -454,6 +461,7 @@ task MarkDuplicates { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -468,6 +476,7 @@ task MarkDuplicates { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/samtools.wdl b/samtools.wdl index a4a893a1..ca6de261 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -203,6 +203,8 @@ task Flagstat { File inputBam String outputPath + String memory = "1G" + Int timeMinutes = size(inputBam, "G") String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -217,6 +219,8 @@ task Flagstat { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -224,6 +228,7 @@ task Flagstat { # inputs inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} outputPath: {description: "The location the ouput should be written to.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -399,7 +404,7 @@ task FilterShortReadsBam { String outputPathBam String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - + String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") command { From 99b1586cf50642a211a35f7aec880796d8f7d92e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 7 Apr 2020 12:18:57 +0200 Subject: [PATCH 0321/1208] time_minutes --- gatk.wdl | 28 +++++++++++++++++++--------- picard.wdl | 7 +++++-- umi-tools.wdl | 1 + 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index c9c1d5eb..125e184b 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -92,7 +92,7 @@ task ApplyBQSR { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can be used. + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -122,6 +122,7 @@ task ApplyBQSR { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -163,7 +164,7 @@ task BaseRecalibrator { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can be used. + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -413,15 +414,16 @@ task CombineGVCFs { File referenceFastaDict File referenceFastaFai - String memory = "24G" - String javaXmx = "12G" + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ CombineGVCFs \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -436,6 +438,7 @@ task CombineGVCFs { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -453,6 +456,7 @@ task CombineGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -795,15 +799,16 @@ task GenotypeGVCFs { File? dbsnpVCFIndex File? pedigree - String memory = "18G" + String memory = "7G" String javaXmx = "6G" + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GenotypeGVCFs \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -823,6 +828,7 @@ task GenotypeGVCFs { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -843,6 +849,7 @@ task GenotypeGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -919,15 +926,16 @@ task HaplotypeCaller { Boolean gvcf = false String emitRefConfidence = if gvcf then "GVCF" else "NONE" - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ HaplotypeCaller \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -949,6 +957,7 @@ task HaplotypeCaller { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -976,6 +985,7 @@ task HaplotypeCaller { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/picard.wdl b/picard.wdl index 895acea0..145af9ef 100644 --- a/picard.wdl +++ b/picard.wdl @@ -489,8 +489,9 @@ task MergeVCFs { Array[File]+ inputVCFsIndexes String outputVcfPath - String memory = "24G" - String javaXmx = "8G" + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = size(inputVCFs, "G") String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -513,6 +514,7 @@ task MergeVCFs { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -525,6 +527,7 @@ task MergeVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/umi-tools.wdl b/umi-tools.wdl index 415081fc..c44635ce 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -118,6 +118,7 @@ task Dedup { umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} memory: {description: "The amount of memory required for the task.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 5ffb5473d85b77ba20dac1dc9329c2d8a6436166 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Apr 2020 13:51:50 +0200 Subject: [PATCH 0322/1208] put requirements as defaults --- cutadapt.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index fbd51384..421259d9 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -79,8 +79,8 @@ task Cutadapt { # Hence we use compression level 1 here. Int compressionLevel = 1 # This only affects outputs with the .gz suffix. Int cores = 4 - String? memory - Int? timeMinutes + String memory = "~{300 + 100 * cores}M" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" } @@ -167,8 +167,8 @@ task Cutadapt { runtime { cpu: cores - memory: select_first([memory, "~{300 + 100 * cores}M"]) - time_minutes: select_first([timeMinutes, 1 + ceil(size([read1, read2], "G") * 12.0 / cores)]) + memory: memory + time_minutes: timeMinutes docker: dockerImage } From 271cc57a82d4f174a75c44ae3228a44176949e53 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Apr 2020 13:53:25 +0200 Subject: [PATCH 0323/1208] put requirements as defaults --- fastqc.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fastqc.wdl b/fastqc.wdl index 3e461053..6708a6bc 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -38,8 +38,9 @@ task Fastqc { String? dir Int threads = 1 - String? memory - Int? timeMinutes + # Fastqc uses 250MB per thread in its wrapper. + String memory = "~{250 + 250 * threads}M" + Int? timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile @@ -83,9 +84,9 @@ task Fastqc { runtime { cpu: threads - memory: select_first([memory, "~{250 + 250 * threads}M"]) + memory: memory docker: dockerImage - time_minutes: select_first([timeMinutes, 1 + ceil(size(seqFile, "G")) * 4]) + time_minutes: timeMinutes } parameter_meta { From 7f4e474765273ed670786c2dca1b7db57995812d Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Apr 2020 15:13:36 +0200 Subject: [PATCH 0324/1208] Replace mv command with cp. --- lima.wdl | 10 +++++----- scripts | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lima.wdl b/lima.wdl index 747959a1..c33ee031 100644 --- a/lima.wdl +++ b/lima.wdl @@ -87,13 +87,13 @@ task Lima { ~{barcodeFile} \ ~{basename(outputPrefix) + ".fl.bam"} - # Move commands below are needed because glob command does not find + # copy commands below are needed because glob command does not find # multiple bam/bam.pbi/subreadset.xml files when not located in working # directory. - mv "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" - mv "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" - mv "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" - mv "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" + cp "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" + cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" + cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" + cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" } output { diff --git a/scripts b/scripts index dfef7cb2..b83da72b 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit dfef7cb2555667126dc1751add414527240d71bc +Subproject commit b83da72b9b43b956a3062b78fb08044eb9fae464 From fca3b3f03259515e00e3028a3fa2ca2db4e2cdb1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Apr 2020 15:14:11 +0200 Subject: [PATCH 0325/1208] Update CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c3a3744..4bc971ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Lima: Replace mv command with cp. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. + Lima: Add missing output to parameter_meta. From 722633698afd7c813f853ca91fbf318b6333e2d4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Apr 2020 15:42:55 +0200 Subject: [PATCH 0326/1208] Update runtime. --- CHANGELOG.md | 1 + ccs.wdl | 2 ++ isoseq3.wdl | 2 ++ lima.wdl | 2 ++ 4 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bc971ee..d7068687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Add time_minutes runtime to CCS, Lima & IsoSeq3. + Lima: Replace mv command with cp. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. diff --git a/ccs.wdl b/ccs.wdl index 39bb0a19..a0941e97 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -34,6 +34,7 @@ task CCS { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" + Int timeMinutes = ceil(size(subreadsFile, "G") * 240 / cores) } command { @@ -64,6 +65,7 @@ task CCS { cpu: cores memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { diff --git a/isoseq3.wdl b/isoseq3.wdl index 8cc0db8f..de58bc42 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,6 +32,7 @@ task Refine { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" + Int timeMinutes = ceil(size(inputBamFile, "G") * 240 / cores) } command <<< @@ -77,6 +78,7 @@ task Refine { cpu: cores memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { diff --git a/lima.wdl b/lima.wdl index c33ee031..5e9c32f1 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,6 +51,7 @@ task Lima { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" + Int timeMinutes = ceil(size(inputBamFile, "G") * 240 / cores) } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} @@ -111,6 +112,7 @@ task Lima { cpu: cores memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { From 56030a53809de880c739bec537902a870e921d97 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 7 Apr 2020 15:45:28 +0200 Subject: [PATCH 0327/1208] Update parameter_meta. --- ccs.wdl | 1 + isoseq3.wdl | 1 + lima.wdl | 1 + 3 files changed, 3 insertions(+) diff --git a/ccs.wdl b/ccs.wdl index a0941e97..983900dc 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -81,6 +81,7 @@ task CCS { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputCCSfile: {description: "Consensus reads output file."} diff --git a/isoseq3.wdl b/isoseq3.wdl index de58bc42..0a3f2636 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -92,6 +92,7 @@ task Refine { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputFLNCfile: {description: "Filtered reads output file."} diff --git a/lima.wdl b/lima.wdl index 5e9c32f1..5a9646db 100644 --- a/lima.wdl +++ b/lima.wdl @@ -145,6 +145,7 @@ task Lima { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputFLfile: {description: "Demultiplexed reads output file(s)."} From 1e71e46d5fa85b05174104b7e8ba22948103d214 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 7 Apr 2020 16:42:06 +0200 Subject: [PATCH 0328/1208] time_minutes --- bcftools.wdl | 6 ++++ clever.wdl | 5 ++- collect-columns.wdl | 2 -- delly.wdl | 3 ++ gatk.wdl | 7 ++-- multiqc.wdl | 4 ++- picard.wdl | 34 ++++++++++++-------- samtools.wdl | 78 ++++++++++++++++++++++++--------------------- 8 files changed, 84 insertions(+), 55 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 122fcdd1..d923885d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -26,6 +26,8 @@ task Bcf2Vcf { input { File bcf String outputPath = "./bcftools/SV.vcf" + String memory = "2G" + Int timeMinutes = ceil(size(bcf, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } @@ -40,12 +42,16 @@ task Bcf2Vcf { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { bcf: {description: "The generated BCF from an SV caller", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/clever.wdl b/clever.wdl index e1dcf5a6..2da9f4d2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -90,7 +90,8 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "15G" + String memory = "55G" + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -114,6 +115,7 @@ task Prediction { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -125,6 +127,7 @@ task Prediction { outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/collect-columns.wdl b/collect-columns.wdl index 6855b6c0..ed2a4577 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -58,8 +58,6 @@ task CollectColumns { File outputTable = outputPath } - - runtime { memory: "~{memoryGb}G" time_minutes: timeMinutes diff --git a/delly.wdl b/delly.wdl index ad8f18d9..efa1bf60 100644 --- a/delly.wdl +++ b/delly.wdl @@ -31,6 +31,7 @@ task CallSV { String outputPath = "./delly/delly.vcf" String memory = "15G" + Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } @@ -49,6 +50,7 @@ task CallSV { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -60,6 +62,7 @@ task CallSV { referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/gatk.wdl b/gatk.wdl index 125e184b..cb26ca75 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1439,15 +1439,16 @@ task SplitNCigarReads { String outputBam Array[File] intervals = [] - String memory = "16G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } command { set -e mkdir -p "$(dirname ~{outputBam})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ SplitNCigarReads \ -I ~{inputBam} \ -R ~{referenceFasta} \ @@ -1462,6 +1463,7 @@ task SplitNCigarReads { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1479,6 +1481,7 @@ task SplitNCigarReads { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/multiqc.wdl b/multiqc.wdl index db1dd21e..3a1908a6 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -57,7 +57,7 @@ task MultiQC { Array[Boolean] finished = [] # An array of booleans that can be used to let multiqc wait on stuff. String memory = "4G" - + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } @@ -108,6 +108,7 @@ task MultiQC { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -146,6 +147,7 @@ task MultiQC { finished: {description: "An array of booleans that can be used to let multiqc wait on stuff.", category: "internal_use_only"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/picard.wdl b/picard.wdl index 145af9ef..2eb9d410 100644 --- a/picard.wdl +++ b/picard.wdl @@ -26,15 +26,16 @@ task BedToIntervalList { File dict String outputPath = "regions.interval_list" - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { set -e mkdir -p "$(dirname ~{outputPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ BedToIntervalList \ I=~{bedFile} \ O=~{outputPath} \ @@ -47,6 +48,7 @@ task BedToIntervalList { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -204,8 +206,9 @@ task CollectRnaSeqMetrics { String basename String strandSpecificity = "NONE" - String memory = "32G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -213,7 +216,7 @@ task CollectRnaSeqMetrics { set -e mkdir -p "$(dirname ~{basename})" picard -Xmx~{javaXmx} \ - CollectRnaSeqMetrics \ + CollectRnaSeqMetrics -XX:ParallelGCThreads=1 \ I=~{inputBam} \ O=~{basename}.RNA_Metrics \ CHART_OUTPUT=~{basename}.RNA_Metrics.pdf \ @@ -228,6 +231,7 @@ task CollectRnaSeqMetrics { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -245,6 +249,7 @@ task CollectRnaSeqMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -261,15 +266,16 @@ task CollectTargetedPcrMetrics { Array[File]+ targetIntervals String basename - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } command { set -e mkdir -p "$(dirname ~{basename})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ CollectTargetedPcrMetrics \ I=~{inputBam} \ R=~{referenceFasta} \ @@ -288,6 +294,7 @@ task CollectTargetedPcrMetrics { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -311,6 +318,7 @@ task CollectTargetedPcrMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -381,7 +389,7 @@ task GatherVcfs { command { set -e mkdir -p "$(dirname ~{outputVcfPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherVcfs \ INPUT=~{sep=' INPUT=' inputVcfs} \ OUTPUT=~{outputVcfPath} @@ -491,7 +499,7 @@ task MergeVCFs { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = size(inputVCFs, "G") + Int timeMinutes = ceil(size(inputVCFs, "G")) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -501,7 +509,7 @@ task MergeVCFs { command { set -e mkdir -p "$(dirname ~{outputVcfPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ MergeVcfs \ INPUT=~{sep=' INPUT=' inputVCFs} \ OUTPUT=~{outputVcfPath} @@ -551,7 +559,7 @@ task SamToFastq { command { set -e - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ SamToFastq \ I=~{inputBam} \ ~{"FASTQ=" + outputRead1} \ @@ -584,7 +592,7 @@ task ScatterIntervalList { command { set -e mkdir scatter_list - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ IntervalListTools \ SCATTER_COUNT=~{scatter_count} \ SUBDIVISION_MODE=BALANCING_WITHOUT_INTERVAL_SUBDIVISION_WITH_OVERFLOW \ @@ -620,7 +628,7 @@ task SortVcf { command { set -e mkdir -p "$(dirname ~{outputVcfPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ SortVcf \ I=~{sep=" I=" vcfFiles} \ ~{"SEQUENCE_DICTIONARY=" + dict} \ @@ -664,7 +672,7 @@ task RenameSample { command { set -e mkdir -p "$(dirname ~{outputPath})" - picard -Xmx~{javaXmx} \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ RenameSampleInVcf \ I=~{inputVcf} \ O=~{outputPath} \ diff --git a/samtools.wdl b/samtools.wdl index ca6de261..dc462f82 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -198,13 +198,53 @@ task Markdup { } } +task FilterShortReadsBam { + input { + File bamFile + String outputPathBam + String memory = "1G" + Int timeMinutes = ceil(size(bamFile, "G") * 8) + String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + } + + String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") + + command { + set -e + mkdir -p "$(dirname ~{outputPathBam})" + samtools view -h ~{bamFile} | \ + awk 'length($10) > 30 || $1 ~/^@/' | \ + samtools view -bS -> ~{outputPathBam} + samtools index ~{outputPathBam} ~{outputPathBamIndex} + } + + output { + File filteredBam = outputPathBam + File filteredBamIndex = outputPathBamIndex + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + bamFile: {description: "The bam file to process.", category: "required"} + outputPathBam: {description: "The filtered bam file.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + task Flagstat { input { File inputBam String outputPath String memory = "1G" - Int timeMinutes = size(inputBam, "G") + Int timeMinutes = ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -396,38 +436,4 @@ task View { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} - -task FilterShortReadsBam { - input { - File bamFile - String outputPathBam - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" - } - - String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") - - command { - set -e - mkdir -p "$(dirname ~{outputPathBam})" - samtools view -h ~{bamFile} | \ - awk 'length($10) > 30 || $1 ~/^@/' | \ - samtools view -bS -> ~{outputPathBam} - samtools index ~{outputPathBam} ~{outputPathBamIndex} - } - - output { - File filteredBam = outputPathBam - File filteredBamIndex = outputPathBamIndex - } - - runtime { - docker: dockerImage - } - - parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - } -} +} \ No newline at end of file From 2641a1e9dfea823d9e658b50decf146b0a8ecdd5 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 09:51:43 +0200 Subject: [PATCH 0329/1208] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/smoove.wdl b/smoove.wdl index 93523f3d..1f12ff28 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -59,10 +59,10 @@ task CallSV { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} - referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputDir: {description: "The location the output VCF file should be written.", category: "common"} sample: {description: "The name of the sample", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From e3e0987b1d9679c5c28673c354f9622dc475468c Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 09:51:52 +0200 Subject: [PATCH 0330/1208] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 1f12ff28..d883cd6e 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -61,7 +61,7 @@ task CallSV { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputDir: {description: "The location the output VCF file should be written.", category: "common"} - sample: {description: "The name of the sample", category: "required"} + sample: {description: "The name of the sample.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 7a2be2edd248edfd5111ed015e84c87038db46a7 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 09:52:01 +0200 Subject: [PATCH 0331/1208] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index d883cd6e..e1f83f31 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -62,7 +62,7 @@ task CallSV { referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputDir: {description: "The location the output VCF file should be written.", category: "common"} sample: {description: "The name of the sample.", category: "required"} - memory: {description: "The memory required to run the programs", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 5c2503623b3ea6336738fd343c93686fc8a319f3 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 09:52:16 +0200 Subject: [PATCH 0332/1208] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index e1f83f31..c8c49f9d 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -57,7 +57,7 @@ task CallSV { parameter_meta { # inputs bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputDir: {description: "The location the output VCF file should be written.", category: "common"} From 73f71840924eb148ec2087db799eaf5499c25d31 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 8 Apr 2020 10:16:13 +0200 Subject: [PATCH 0333/1208] add timeMinutes --- smoove.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/smoove.wdl b/smoove.wdl index c8c49f9d..86eabc69 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -33,6 +33,7 @@ task CallSV { String memory = "15G" String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + Int timeMinutes = 1440 } command { @@ -52,6 +53,8 @@ task CallSV { runtime { memory: memory docker: dockerImage + time_minutes: timeMinutes + } parameter_meta { From c8d443ae7e3474e24b19ffcff7364e5dc3768f23 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 8 Apr 2020 10:17:32 +0200 Subject: [PATCH 0334/1208] change task name to Call --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 86eabc69..9ec46305 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -22,7 +22,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task CallSV { +task Call { input { File bamFile File bamIndex From 8f1d98a42841e8b6e6b45e917ad8881df9780a1e Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 8 Apr 2020 10:24:56 +0200 Subject: [PATCH 0335/1208] Update smoove.wdl Co-Authored-By: DavyCats --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 9ec46305..c3ab8f6a 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -38,7 +38,7 @@ task Call { command { set -e - mkdir -p "$(dirname ~{outputDir})" + mkdir -p ~{outputDir} smoove call \ --outdir ~{outputDir} \ --name ~{sample} \ From 3d73a28be412afd95bde9fe0a055f70c8942c506 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 8 Apr 2020 10:40:29 +0200 Subject: [PATCH 0336/1208] Add baseline value. --- CHANGELOG.md | 2 +- ccs.wdl | 2 +- isoseq3.wdl | 2 +- lima.wdl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7068687..7c41c525 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- -+ Add time_minutes runtime to CCS, Lima & IsoSeq3. ++ Add time_minutes in runtime of CCS, Lima & IsoSeq3. + Lima: Replace mv command with cp. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. diff --git a/ccs.wdl b/ccs.wdl index 983900dc..6a90475b 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -34,7 +34,7 @@ task CCS { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" - Int timeMinutes = ceil(size(subreadsFile, "G") * 240 / cores) + Int timeMinutes = 1 + ceil(size(subreadsFile, "G") * 240 / cores) } command { diff --git a/isoseq3.wdl b/isoseq3.wdl index 0a3f2636..803aa551 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,7 +32,7 @@ task Refine { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" - Int timeMinutes = ceil(size(inputBamFile, "G") * 240 / cores) + Int timeMinutes = 1 + ceil(size(inputBamFile, "G") * 240 / cores) } command <<< diff --git a/lima.wdl b/lima.wdl index 5a9646db..33949aa6 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,7 +51,7 @@ task Lima { Int cores = 4 String memory = "10G" String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" - Int timeMinutes = ceil(size(inputBamFile, "G") * 240 / cores) + Int timeMinutes = 1 + ceil(size(inputBamFile, "G") * 240 / cores) } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} From e37162e4fce20021a675c5c1aeb01ae0ed7c64f5 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 8 Apr 2020 14:24:42 +0200 Subject: [PATCH 0337/1208] add meta info for time_minutes --- smoove.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/smoove.wdl b/smoove.wdl index c3ab8f6a..afdd862a 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -66,6 +66,7 @@ task Call { outputDir: {description: "The location the output VCF file should be written.", category: "common"} sample: {description: "The name of the sample.", category: "required"} memory: {description: "The memory required to run the programs.", category: "advanced"} + time_minutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From b4e37856cf5f21f56d5824dc7c01ac1c63cdf4c0 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 8 Apr 2020 14:56:26 +0200 Subject: [PATCH 0338/1208] update smoove.wdl --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index afdd862a..e8846f72 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -66,7 +66,7 @@ task Call { outputDir: {description: "The location the output VCF file should be written.", category: "common"} sample: {description: "The name of the sample.", category: "required"} memory: {description: "The memory required to run the programs.", category: "advanced"} - time_minutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 605bba8d934f99bf69cefbde03c3fe6d70728c5f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 8 Apr 2020 15:56:07 +0200 Subject: [PATCH 0339/1208] even more time_minutes --- CPAT.wdl | 3 + bcftools.wdl | 2 +- bedtools.wdl | 62 ++++++----- bowtie.wdl | 2 +- bwa.wdl | 4 +- clever.wdl | 1 + cutadapt.wdl | 287 +++++++++++-------------------------------------- fastqc.wdl | 3 +- gatk.wdl | 2 +- gffcompare.wdl | 3 + gffread.wdl | 3 + hisat2.wdl | 5 +- htseq.wdl | 59 +++------- picard.wdl | 12 +-- samtools.wdl | 11 +- star.wdl | 3 + stringtie.wdl | 123 ++++++--------------- umi-tools.wdl | 2 +- 18 files changed, 184 insertions(+), 403 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index 098d9ca6..8d212b07 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -31,6 +31,7 @@ task CPAT { # CPAT should not index the reference genome. Array[String]? startCodons Array[String]? stopCodons + Int timeMinutes = 1 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } @@ -55,6 +56,7 @@ task CPAT { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -67,6 +69,7 @@ task CPAT { category: "advanced"} startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/bcftools.wdl b/bcftools.wdl index d923885d..53165c6b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -27,7 +27,7 @@ task Bcf2Vcf { File bcf String outputPath = "./bcftools/SV.vcf" String memory = "2G" - Int timeMinutes = ceil(size(bcf, "G")) + Int timeMinutes = 1 + ceil(size(bcf, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } diff --git a/bedtools.wdl b/bedtools.wdl index 4f39e2a8..99bb351e 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -24,8 +24,10 @@ task Complement { input { File faidx File inputBed - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" String outputBed = basename(inputBed, "\.bed") + ".complement.bed" + String memory = "2G" + Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } # Use a fasta index file to get the genome sizes. And convert that to the @@ -44,20 +46,19 @@ task Complement { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes", - category: "required"} - inputBed: {description: "The inputBed to complement", - category: "required"} - outputBed: {description: "The path to write the output to", - category: "advanced"} - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes.", category: "required"} + inputBed: {description: "The inputBed to complement.", category: "required"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } @@ -97,6 +98,8 @@ task MergeBedFiles { input { Array[File]+ bedFiles String outputBed = "merged.bed" + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -111,17 +114,17 @@ task MergeBedFiles { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - bedFiles: {description: "The bed files to merge", - category: "required"} - outputBed: {description: "The path to write the output to", - category: "advanced"} - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + bedFiles: {description: "The bed files to merge.", category: "required"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } @@ -172,6 +175,8 @@ task Intersect { # Giving a faidx file will set the sorted option. File? faidx String outputBed = "intersect.bed" + String memory = "2G" + Int timeMinutes = 1 + ceil([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } Boolean sorted = defined(faidx) @@ -192,21 +197,20 @@ task Intersect { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", category: "common"} - regionsA: {description: "Region file a to intersect", - category: "required"} - regionsB: {description: "Region file b to intersect", - category: "required"} - outputBed: {description: "The path to write the output to", - category: "advanced"} - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + regionsA: {description: "Region file a to intersect", category: "required"} + regionsB: {description: "Region file b to intersect", category: "required"} + outputBed: {description: "The path to write the output to", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/bowtie.wdl b/bowtie.wdl index 94a809fa..87427e7d 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -37,7 +37,7 @@ task Bowtie { String? samRG Int threads = 1 - Int timeMinutes = ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) + Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) String memory = "10G" String picardXmx = "4G" # Image contains bowtie=1.2.2 and picard=2.9.2 diff --git a/bwa.wdl b/bwa.wdl index 247386d8..a39eb3e9 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -31,7 +31,7 @@ task Mem { Int threads = 4 String memory = "20G" String picardXmx = "4G" - Int timeMinutes = ceil(size([read1, read2], "G") * 200 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) # A mulled container is needed to have both picard and bwa in one container. # This container contains: picard (2.18.7), bwa (0.7.17-r1188) String dockerImage = "quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:43ec6124f9f4f875515f9548733b8b4e5fed9aa6-0" @@ -100,7 +100,7 @@ task Kit { String sortMemoryPerThread = "4G" Int compressionLevel = 1 String memory = "20G" - Int timeMinutes = ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" } diff --git a/clever.wdl b/clever.wdl index 2da9f4d2..7e1eac46 100644 --- a/clever.wdl +++ b/clever.wdl @@ -78,6 +78,7 @@ task Mateclever { outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/cutadapt.wdl b/cutadapt.wdl index 421259d9..ad32ff21 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -173,227 +173,70 @@ task Cutadapt { } parameter_meta { - read1: { - description: "The first or single end fastq file to be run through cutadapt.", - category: "required" - } - read2: { - description: "An optional second end fastq file to be run through cutadapt.", - category: "common" - } - read1output: { - description: "The name of the resulting first or single end fastq file.", - category: "common" - } - read2output: { - description: "The name of the resulting second end fastq file.", - category: "common" - } - adapter: { - description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "common" - } - front: { - description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced" - } - anywhere: { - description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced" - } - adapterRead2: { - description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", - category: "common" - } - frontRead2: { - description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced" - } - anywhereRead2: { - description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced" - } - interleaved: { - description: "Equivalent to cutadapt's --interleaved flag.", - category: "advanced" - } - pairFilter: { - description: "Equivalent to cutadapt's --pair-filter option.", - category: "advanced" - } - errorRate: { - description: "Equivalent to cutadapt's --error-rate option.", - category: "advanced" - } - noIndels: { - description: "Equivalent to cutadapt's --no-indels flag.", - category: "advanced" - } - times: { - description: "Equivalent to cutadapt's --times option.", - category: "advanced" - } - overlap: { - description: "Equivalent to cutadapt's --overlap option.", - category: "advanced" - } - matchReadWildcards: { - description: "Equivalent to cutadapt's --match-read-wildcards flag.", - category: "advanced" - } - noMatchAdapterWildcards: { - description: "Equivalent to cutadapt's --no-match-adapter-wildcards flag.", - category: "advanced" - } - noTrim: { - description: "Equivalent to cutadapt's --no-trim flag.", - category: "advanced" - } - maskAdapter: { - description: "Equivalent to cutadapt's --mask-adapter flag.", - category: "advanced" - } - cut: { - description: "Equivalent to cutadapt's --cut option.", - category: "advanced" - } - nextseqTrim: { - description: "Equivalent to cutadapt's --nextseq-trim option.", - category: "advanced" - } - qualityCutoff: { - description: "Equivalent to cutadapt's --quality-cutoff option.", - category: "advanced" - } - qualityBase: { - description: "Equivalent to cutadapt's --quality-base option.", - category: "advanced" - } - length: { - description: "Equivalent to cutadapt's --length option.", - category: "advanced" - } - trimN: { - description: "Equivalent to cutadapt's --trim-n flag.", - category: "advanced" - } - lengthTag: { - description: "Equivalent to cutadapt's --length-tag option.", - category: "advanced" - } - stripSuffix: { - description: "Equivalent to cutadapt's --strip-suffix option.", - category: "advanced" - } - prefix: { - description: "Equivalent to cutadapt's --prefix option.", - category: "advanced" - } - suffix: { - description: "Equivalent to cutadapt's --suffix option.", - category: "advanced" - } - minimumLength: { - description: "Equivalent to cutadapt's --minimum-length option.", - category: "advanced" - } - maximumLength: { - description: "Equivalent to cutadapt's --maximum-length option.", - category: "advanced" - } - maxN: { - description: "Equivalent to cutadapt's --max-n option.", - category: "advanced" - } - discardTrimmed: { - description: "Equivalent to cutadapt's --quality-cutoff option.", - category: "advanced" - } - discardUntrimmed: { - description: "Equivalent to cutadapt's --discard-untrimmed option.", - category: "advanced" - } - infoFilePath: { - description: "Equivalent to cutadapt's --info-file option.", - category: "advanced" - } - restFilePath: { - description: "Equivalent to cutadapt's --rest-file option.", - category: "advanced" - } - wildcardFilePath: { - description: "Equivalent to cutadapt's --wildcard-file option.", - category: "advanced" - } - tooShortOutputPath: { - description: "Equivalent to cutadapt's --too-short-output option.", - category: "advanced" - } - tooLongOutputPath: { - description: "Equivalent to cutadapt's --too-long-output option.", - category: "advanced" - } - untrimmedOutputPath: { - description: "Equivalent to cutadapt's --untrimmed-output option.", - category: "advanced" - } - tooShortPairedOutputPath: { - description: "Equivalent to cutadapt's --too-short-paired-output option.", - category: "advanced" - } - tooLongPairedOutputPath: { - description: "Equivalent to cutadapt's --too-long-paired-output option.", - category: "advanced" - } - untrimmedPairedOutputPath: { - description: "Equivalent to cutadapt's --untrimmed-paired-output option.", - category: "advanced" - } - colorspace: { - description: "Equivalent to cutadapt's --colorspace flag.", - category: "advanced" - } - doubleEncode: { - description: "Equivalent to cutadapt's --double-encode flag.", - category: "advanced" - } - stripF3: { - description: "Equivalent to cutadapt's --strip-f3 flag.", - category: "advanced" - } - maq: { - description: "Equivalent to cutadapt's --maq flag.", - category: "advanced" - } - bwa: { - description: "Equivalent to cutadapt's --bwa flag.", - category: "advanced" - } - zeroCap: { - description: "Equivalent to cutadapt's --zero-cap flag.", - category: "advanced" - } - noZeroCap: { - description: "Equivalent to cutadapt's --no-zero-cap flag.", - category: "advanced" - } - reportPath: { - description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", - category: "common" - } - compressionLevel: {description: "The compression level if gzipped output is used.", - category: "advanced"} - cores: { - description: "The number of cores to use.", - category: "advanced" - } - memory: { - description: "The amount of memory this job will use.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + read1: {description: "The first or single end fastq file to be run through cutadapt.", category: "required"} + read2: {description: "An optional second end fastq file to be run through cutadapt.", category: "common"} + read1output: {description: "The name of the resulting first or single end fastq file.", category: "common"} + read2output: {description: "The name of the resulting second end fastq file.", category: "common"} + adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", + category: "common"} + front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", + category: "advanced"} + anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", + category: "advanced"} + adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", + category: "common"} + frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", + category: "advanced"} + anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", + category: "advanced"} + interleaved: {description: "Equivalent to cutadapt's --interleaved flag.", category: "advanced"} + pairFilter: {description: "Equivalent to cutadapt's --pair-filter option.", category: "advanced"} + errorRate: {description: "Equivalent to cutadapt's --error-rate option.", category: "advanced"} + noIndels: {description: "Equivalent to cutadapt's --no-indels flag.", category: "advanced"} + times: {description: "Equivalent to cutadapt's --times option.", category: "advanced"} + overlap: {description: "Equivalent to cutadapt's --overlap option.", category: "advanced"} + matchReadWildcards: {description: "Equivalent to cutadapt's --match-read-wildcards flag.", category: "advanced"} + noMatchAdapterWildcards: {description: "Equivalent to cutadapt's --no-match-adapter-wildcards flag.", category: "advanced"} + noTrim: {description: "Equivalent to cutadapt's --no-trim flag.", category: "advanced"} + maskAdapter: {description: "Equivalent to cutadapt's --mask-adapter flag.", category: "advanced"} + cut: {description: "Equivalent to cutadapt's --cut option.", category: "advanced"} + nextseqTrim: {description: "Equivalent to cutadapt's --nextseq-trim option.", category: "advanced"} + qualityCutoff: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"} + qualityBase: {description: "Equivalent to cutadapt's --quality-base option.", category: "advanced"} + length: {description: "Equivalent to cutadapt's --length option.", category: "advanced"} + trimN: {description: "Equivalent to cutadapt's --trim-n flag.", category: "advanced"} + lengthTag: {description: "Equivalent to cutadapt's --length-tag option.", category: "advanced"} + stripSuffix: {description: "Equivalent to cutadapt's --strip-suffix option.", category: "advanced"} + prefix: {description: "Equivalent to cutadapt's --prefix option.", category: "advanced"} + suffix: {description: "Equivalent to cutadapt's --suffix option.", category: "advanced"} + minimumLength: {description: "Equivalent to cutadapt's --minimum-length option.", category: "advanced"} + maximumLength: {description: "Equivalent to cutadapt's --maximum-length option.", category: "advanced"} + maxN: {description: "Equivalent to cutadapt's --max-n option.", category: "advanced"} + discardTrimmed: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"} + discardUntrimmed: {description: "Equivalent to cutadapt's --discard-untrimmed option.", category: "advanced"} + infoFilePath: {description: "Equivalent to cutadapt's --info-file option.", category: "advanced"} + restFilePath: {description: "Equivalent to cutadapt's --rest-file option.", category: "advanced"} + wildcardFilePath: {description: "Equivalent to cutadapt's --wildcard-file option.", category: "advanced"} + tooShortOutputPath: {description: "Equivalent to cutadapt's --too-short-output option.", category: "advanced"} + tooLongOutputPath: {description: "Equivalent to cutadapt's --too-long-output option.", category: "advanced"} + untrimmedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-output option.", category: "advanced"} + tooShortPairedOutputPath: {description: "Equivalent to cutadapt's --too-short-paired-output option.", category: "advanced"} + tooLongPairedOutputPath: {description: "Equivalent to cutadapt's --too-long-paired-output option.", category: "advanced"} + untrimmedPairedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-paired-output option.", category: "advanced"} + colorspace: {description: "Equivalent to cutadapt's --colorspace flag.", category: "advanced"} + doubleEncode: {description: "Equivalent to cutadapt's --double-encode flag.", category: "advanced"} + stripF3: {description: "Equivalent to cutadapt's --strip-f3 flag.", category: "advanced"} + maq: {description: "Equivalent to cutadapt's --maq flag.", category: "advanced"} + bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"} + zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"} + noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"} + reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", + category: "common"} + compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"} + cores: {description: "The number of cores to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/fastqc.wdl b/fastqc.wdl index 6708a6bc..606c1bd4 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -40,7 +40,7 @@ task Fastqc { Int threads = 1 # Fastqc uses 250MB per thread in its wrapper. String memory = "~{250 + 250 * threads}M" - Int? timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 + Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray File? NoneFile @@ -106,6 +106,7 @@ task Fastqc { dir: {description: "Equivalent to fastqc's --dir option.", category: "advanced"} threads: {description: "The number of cores to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/gatk.wdl b/gatk.wdl index cb26ca75..366b32dd 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -416,7 +416,7 @@ task CombineGVCFs { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = ceil(size(gvcfFiles, "G") * 8) + Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } diff --git a/gffcompare.wdl b/gffcompare.wdl index ca2b1669..197dd9ad 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -44,6 +44,7 @@ task GffCompare { Boolean verbose = false Boolean debugMode = false + Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" # This workaround only works in the input section. @@ -110,6 +111,7 @@ task GffCompare { } runtime { + time_minutes: timeMinutes docker: dockerImage } @@ -134,6 +136,7 @@ task GffCompare { noTmap: {description: "Equivalent to gffcompare's `-T` flag.", category: "advanced"} verbose: {description: "Equivalent to gffcompare's `-V` flag.", category: "advanced"} debugMode: {description: "Equivalent to gffcompare's `-D` flag.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/gffread.wdl b/gffread.wdl index 6b23785c..d83e4d76 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -30,6 +30,7 @@ task GffRead { String? proteinFastaPath String? filteredGffPath Boolean outputGtfFormat = false + Int timeMinutes = 1 + ceil(size(inputGff) * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } @@ -62,6 +63,7 @@ task GffRead { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -73,6 +75,7 @@ task GffRead { proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"} filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/hisat2.wdl b/hisat2.wdl index bc6be2e8..3ea18ee8 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -32,8 +32,9 @@ task Hisat2 { String platform = "illumina" Boolean downstreamTranscriptomeAssembly = true - Int threads = 1 + Int threads = 4 String memory = "48G" + Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools # hisat2=2.1.0, samtools=1.8 @@ -67,6 +68,7 @@ task Hisat2 { runtime { memory: memory cpu: threads + 1 + time_minutes: timeMinutes docker: dockerImage } @@ -82,6 +84,7 @@ task Hisat2 { downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/htseq.wdl b/htseq.wdl index 900a88a7..9fad1714 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -33,6 +33,7 @@ task HTSeqCount { Array[String] additionalAttributes = [] String memory = "40G" + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 60) String dockerImage = "quay.io/biocontainers/htseq:0.11.2--py37h637b7d7_1" } @@ -56,54 +57,24 @@ task HTSeqCount { } runtime { + time_minutes: timeMinutes memory: memory docker: dockerImage } parameter_meta { - inputBams: { - description: "The input BAM files.", - category: "required" - } - gtfFile: { - description: "A GTF/GFF file containing the features of interest.", - category: "required" - } - outputTable: { - description: "The path to which the output table should be written.", - category: "common" - } - format: { - description: "Equivalent to the -f option of htseq-count.", - category: "advanced" - } - order: { - description: "Equivalent to the -r option of htseq-count.", - category: "advanced" - } - stranded: { - description: "Equivalent to the -s option of htseq-count.", - category: "common" - } - featureType: { - description: "Equivalent to the --type option of htseq-count.", - category: "advanced" - } - idattr: { - description: "Equivalent to the --idattr option of htseq-count.", - category: "advanced" - } - additionalAttributes: { - description: "Equivalent to the --additional-attr option of htseq-count.", - category: "advanced" - } - memory: { - description: "The amount of memory the job requires in GB.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + inputBams: {description: "The input BAM files.", category: "required"} + gtfFile: {description: "A GTF/GFF file containing the features of interest.", category: "required"} + outputTable: {description: "The path to which the output table should be written.", category: "common"} + format: {description: "Equivalent to the -f option of htseq-count.", category: "advanced"} + order: {description: "Equivalent to the -r option of htseq-count.", category: "advanced"} + stranded: {description: "Equivalent to the -s option of htseq-count.", category: "common"} + featureType: {description: "Equivalent to the --type option of htseq-count.", category: "advanced"} + idattr: {description: "Equivalent to the --idattr option of htseq-count.", category: "advanced"} + additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"} + memory: {description: "The amount of memory the job requires in GB.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/picard.wdl b/picard.wdl index 2eb9d410..d19e3ac4 100644 --- a/picard.wdl +++ b/picard.wdl @@ -87,7 +87,7 @@ task CollectMultipleMetrics { String memory = "10G" String javaXmx = "8G" - Int timeMinutes = ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -208,7 +208,7 @@ task CollectRnaSeqMetrics { String memory = "10G" String javaXmx = "8G" - Int timeMinutes = ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -268,7 +268,7 @@ task CollectTargetedPcrMetrics { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -333,7 +333,7 @@ task GatherBamFiles { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = ceil(size(inputBams, "G") * 0.5) + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -428,7 +428,7 @@ task MarkDuplicates { String memory = "10G" String javaXmx = "8G" - Int timeMinutes = ceil(size(inputBams, "G")* 8) + Int timeMinutes = 1 + ceil(size(inputBams, "G")* 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" # The program default for READ_NAME_REGEX is appropriate in nearly every case. @@ -499,7 +499,7 @@ task MergeVCFs { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = ceil(size(inputVCFs, "G")) + Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } diff --git a/samtools.wdl b/samtools.wdl index dc462f82..5ffebc9c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -61,6 +61,8 @@ task Index { input { File bamFile String? outputBamPath + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -87,6 +89,8 @@ task Index { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -95,6 +99,8 @@ task Index { bamFile: {description: "The BAM file for which an index should be made.", category: "required"} outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -203,7 +209,7 @@ task FilterShortReadsBam { File bamFile String outputPathBam String memory = "1G" - Int timeMinutes = ceil(size(bamFile, "G") * 8) + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -244,7 +250,7 @@ task Flagstat { String outputPath String memory = "1G" - Int timeMinutes = ceil(size(inputBam, "G")) + Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -268,6 +274,7 @@ task Flagstat { # inputs inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} outputPath: {description: "The location the ouput should be written to.", category: "required"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/star.wdl b/star.wdl index e1e55a26..94cdfa80 100644 --- a/star.wdl +++ b/star.wdl @@ -36,6 +36,7 @@ task Star { Int runThreadN = 4 String memory = "48G" + Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -66,6 +67,7 @@ task Star { runtime { cpu: runThreadN memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -83,6 +85,7 @@ task Star { limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"} runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/stringtie.wdl b/stringtie.wdl index cfaccc92..f1d994b3 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -33,6 +33,7 @@ task Stringtie { Int threads = 1 String memory = "10G" + Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" } @@ -58,54 +59,24 @@ task Stringtie { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - bam: { - description: "The input BAM file.", - category: "required" - } - bamIndex: { - description: "The input BAM file's index.", - category: "required" - } - referenceGtf: { - description: "A reference GTF file to be used as guide.", - category: "common" - } - skipNovelTranscripts: { - description: "Whether new transcripts should be assembled or not.", - category: "common" - } - assembledTranscriptsFile: { - description: "Where the output of the assembly should be written.", - category: "required" - } - firstStranded: { - description: "Equivalent to the --rf flag of stringtie.", - category: "required" - } - secondStranded: { - description: "Equivalent to the --fr flag of stringtie.", - category: "required" - } - geneAbundanceFile: { - description: "Where the abundance file should be written.", - category: "common" - } - threads: { - description: "The number of threads to use.", - category: "advanced" - } - memory: { - description: "The amount of memory needed for this task in GB.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + bam: {description: "The input BAM file.", category: "required"} + bamIndex: {description: "The input BAM file's index.", category: "required"} + referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} + skipNovelTranscripts: {description: "Whether new transcripts should be assembled or not.", category: "common"} + assembledTranscriptsFile: {description: "Where the output of the assembly should be written.", category: "required"} + firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"} + secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"} + geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } @@ -123,6 +94,7 @@ task Merge { String? label String memory = "10G" + Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20) String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" } @@ -148,57 +120,24 @@ task Merge { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - gtfFiles: { - description: "The GTF files produced by stringtie.", - category: "required" - } - outputGtfPath: { - description: "Where the output should be written.", - category: "required" - } - guideGtf: { - description: "Equivalent to the -G option of 'stringtie --merge'.", - category: "advanced" - } - minimumLength: { - description: "Equivalent to the -m option of 'stringtie --merge'.", - category: "advanced" - } - minimumCoverage: { - description: "Equivalent to the -c option of 'stringtie --merge'.", - category: "advanced" - } - minimumFPKM: { - description: "Equivalent to the -F option of 'stringtie --merge'.", - category: "advanced" - } - minimumTPM: { - description: "Equivalent to the -T option of 'stringtie --merge'.", - category: "advanced" - } - minimumIsoformFraction: { - description: "Equivalent to the -f option of 'stringtie --merge'.", - category: "advanced" - } - keepMergedTranscriptsWithRetainedIntrons: { - description: "Equivalent to the -i flag of 'stringtie --merge'.", - category: "advanced" - } - label: { - description: "Equivalent to the -l option of 'stringtie --merge'.", - category: "advanced" - } - memory: { - description: "The amount of memory needed for this task in GB.", - category: "advanced" - } - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + gtfFiles: {description: "The GTF files produced by stringtie.", category: "required"} + outputGtfPath: {description: "Where the output should be written.", category: "required"} + guideGtf: {description: "Equivalent to the -G option of 'stringtie --merge'.", category: "advanced"} + minimumLength: {description: "Equivalent to the -m option of 'stringtie --merge'.", category: "advanced"} + minimumCoverage: {description: "Equivalent to the -c option of 'stringtie --merge'.", category: "advanced"} + minimumFPKM: {description: "Equivalent to the -F option of 'stringtie --merge'.", category: "advanced"} + minimumTPM: {description: "Equivalent to the -T option of 'stringtie --merge'.", category: "advanced"} + minimumIsoformFraction: {description: "Equivalent to the -f option of 'stringtie --merge'.", category: "advanced"} + keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} + label: {description: "Equivalent to the -l option of 'stringtie --merge'.", category: "advanced"} + memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/umi-tools.wdl b/umi-tools.wdl index c44635ce..608924f3 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -76,7 +76,7 @@ task Dedup { Boolean paired = true String memory = "5G" - Int timeMinutes = ceil(size(inputBam, "G") * 18) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 18) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" From 04f0edc21f115e44933a79eea6214234e87bc556 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 8 Apr 2020 16:46:11 +0200 Subject: [PATCH 0340/1208] Update WDL files. --- CHANGELOG.md | 2 +- ccs.wdl | 7 ++----- isoseq3.wdl | 47 +++++++++++++++-------------------------------- lima.wdl | 7 ++----- 4 files changed, 20 insertions(+), 43 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c41c525..42b0b9e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- -+ Add time_minutes in runtime of CCS, Lima & IsoSeq3. ++ Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. diff --git a/ccs.wdl b/ccs.wdl index 6a90475b..3a8f8879 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -31,10 +31,9 @@ task CCS { File subreadsFile String outputPrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" - Int timeMinutes = 1 + ceil(size(subreadsFile, "G") * 240 / cores) } command { @@ -65,7 +64,6 @@ task CCS { cpu: cores memory: memory docker: dockerImage - time_minutes: timeMinutes } parameter_meta { @@ -81,7 +79,6 @@ task CCS { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputCCSfile: {description: "Consensus reads output file."} diff --git a/isoseq3.wdl b/isoseq3.wdl index 803aa551..44005a40 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -27,58 +27,41 @@ task Refine { String logLevel = "WARN" File inputBamFile File primerFile - String outputPrefix + String outputDir + String outputNamePrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "1G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" - Int timeMinutes = 1 + ceil(size(inputBamFile, "G") * 240 / cores) } command <<< set -e - mkdir -p "$(dirname ~{outputPrefix})" - - # Create a unique output name base on the input bam file. - bamBasename="$(basename ~{inputBamFile})" - bamNewName="${bamBasename/fl/flnc}" - folderDirname="$(dirname ~{outputPrefix})" - combinedOutput="${folderDirname}/${bamNewName}" - + mkdir -p "$(dirname ~{outputDir})" isoseq3 refine \ --min-polya-length ~{minPolyAlength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - --log-file "${bamNewName}.stderr.log" \ + --log-file "~{outputDir}/~{outputNamePrefix}.stderr.log" \ ~{inputBamFile} \ ~{primerFile} \ - ${bamNewName} - - # Copy commands below are needed because naming schema for Refine output - # can not be correctly handled in the WDL output section. - cp "${bamNewName}" "${combinedOutput}" - cp "${bamNewName}.pbi" "${combinedOutput}.pbi" - cp "${bamNewName/bam/consensusreadset}.xml" "${combinedOutput/bam/consensusreadset}.xml" - cp "${bamNewName/bam/filter_summary}.json" "${combinedOutput/bam/filter_summary}.json" - cp "${bamNewName/bam/report}.csv" "${combinedOutput/bam/report}.csv" - cp "${bamNewName}.stderr.log" "${combinedOutput}.stderr.log" + "~{outputDir}/~{outputNamePrefix}.bam" >>> output { - Array[File] outputFLNCfile = glob("*.bam") - Array[File] outputFLNCindexFile = glob("*.bam.pbi") - Array[File] outputConsensusReadsetFile = glob("*.consensusreadset.xml") - Array[File] outputFilterSummaryFile = glob("*.filter_summary.json") - Array[File] outputReportFile = glob("*.report.csv") - Array[File] outputSTDERRfile = glob("*.stderr.log") + File outputFLNCfile = outputDir + "/" + outputNamePrefix + ".bam" + File outputFLNCindexFile = outputDir + "/" + outputNamePrefix + ".bam.pbi" + File outputConsensusReadsetFile = outputDir + "/" + outputNamePrefix + ".consensusreadset.xml" + File outputFilterSummaryFile = outputDir + "/" + outputNamePrefix + ".filter_summary.json" + File outputReportFile = outputDir + "/" + outputNamePrefix + ".report.csv" + File outputSTDERRfile = outputDir + "/" + outputNamePrefix + ".stderr.log" } runtime { cpu: cores memory: memory docker: dockerImage - time_minutes: timeMinutes } parameter_meta { @@ -88,11 +71,11 @@ task Refine { logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "BAM input file.", category: "required"} primerFile: {description: "Barcode/primer fasta file.", category: "required"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + outputNamePrefix: {description: "Basename of the output files.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputFLNCfile: {description: "Filtered reads output file."} diff --git a/lima.wdl b/lima.wdl index 33949aa6..ba8a5407 100644 --- a/lima.wdl +++ b/lima.wdl @@ -48,10 +48,9 @@ task Lima { File barcodeFile String outputPrefix - Int cores = 4 - String memory = "10G" + Int cores = 2 + String memory = "2G" String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" - Int timeMinutes = 1 + ceil(size(inputBamFile, "G") * 240 / cores) } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} @@ -112,7 +111,6 @@ task Lima { cpu: cores memory: memory docker: dockerImage - time_minutes: timeMinutes } parameter_meta { @@ -145,7 +143,6 @@ task Lima { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - timeMinutes: {description: "The time (in minutes) it will take for this task to complete.", category: "advanced"} # outputs outputFLfile: {description: "Demultiplexed reads output file(s)."} From e232418621388cf28bf450ec79dad1b859219d30 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Apr 2020 10:26:44 +0200 Subject: [PATCH 0341/1208] fix missing "size(" --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index 99bb351e..4f16b7c0 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -176,7 +176,7 @@ task Intersect { File? faidx String outputBed = "intersect.bed" String memory = "2G" - Int timeMinutes = 1 + ceil([regionsA, regionsB], "G")) + Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } Boolean sorted = defined(faidx) From 9dec796138a9b42b50cd251f25cf08d337c2570e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Apr 2020 10:31:48 +0200 Subject: [PATCH 0342/1208] remove timeMinutes parameter_meta for mateclever --- clever.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/clever.wdl b/clever.wdl index 7e1eac46..2da9f4d2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -78,7 +78,6 @@ task Mateclever { outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From ea9c340a3126665caad0e04bd9c5aae21491c624 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Apr 2020 11:04:39 +0200 Subject: [PATCH 0343/1208] fix start time_minutes --- star.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/star.wdl b/star.wdl index 94cdfa80..94a090a8 100644 --- a/star.wdl +++ b/star.wdl @@ -23,7 +23,7 @@ version 1.0 task Star { input { Array[File]+ inputR1 - Array[File]? inputR2 + Array[File] inputR2 = [] Array[File]+ indexFiles String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" @@ -36,7 +36,7 @@ task Star { Int runThreadN = 4 String memory = "48G" - Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / runThreadN) + Int timeMinutes = 1 + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } From a4b3d7a925eeeeee6fa13d6c85e5accf4c23f2a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 9 Apr 2020 16:10:41 +0200 Subject: [PATCH 0344/1208] update time_minutes --- biopet/biopet.wdl | 2 +- htseq.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 7634e7b0..9004f917 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -266,7 +266,7 @@ task ScatterRegions { String memory = "1G" String javaXmx = "500M" - Int timeMinutes = 1 + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" } diff --git a/htseq.wdl b/htseq.wdl index 9fad1714..35faeef3 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -33,7 +33,7 @@ task HTSeqCount { Array[String] additionalAttributes = [] String memory = "40G" - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 60) + Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) String dockerImage = "quay.io/biocontainers/htseq:0.11.2--py37h637b7d7_1" } From 14d4dfb82412c00c934a994d6dc3a396c63fd0bb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Apr 2020 11:33:33 +0200 Subject: [PATCH 0345/1208] fix CPAT time_minutes --- CPAT.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index 8d212b07..3b542e4f 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -31,7 +31,7 @@ task CPAT { # CPAT should not index the reference genome. Array[String]? startCodons Array[String]? stopCodons - Int timeMinutes = 1 + ceil(size(gene, "G") * 30) + Int timeMinutes = 10 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } From 4f9b821503b03abdb75becf4e913d32249420444 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Apr 2020 12:20:53 +0200 Subject: [PATCH 0346/1208] fix gatk java options --- gatk.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 366b32dd..e6b86eed 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -171,7 +171,7 @@ task BaseRecalibrator { command { set -e mkdir -p "$(dirname ~{recalibrationReportPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ BaseRecalibrator \ -R ~{referenceFasta} \ -I ~{inputBam} \ @@ -423,7 +423,7 @@ task CombineGVCFs { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CombineGVCFs \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -706,7 +706,7 @@ task GatherBqsrReports { command { set -e mkdir -p "$(dirname ~{outputReportPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ GatherBQSRReports \ -I ~{sep=' -I ' inputBQSRreports} \ -O ~{outputReportPath} @@ -808,7 +808,7 @@ task GenotypeGVCFs { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ GenotypeGVCFs \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -935,7 +935,7 @@ task HaplotypeCaller { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ HaplotypeCaller \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -1448,7 +1448,7 @@ task SplitNCigarReads { command { set -e mkdir -p "$(dirname ~{outputBam})" - gatk --java-options -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ SplitNCigarReads \ -I ~{inputBam} \ -R ~{referenceFasta} \ From ee708b8ea82f9b6d4522dab54f980013f1b9d6a4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 14 Apr 2020 10:34:31 +0200 Subject: [PATCH 0347/1208] Isoseq3 requires more memory by default. --- CHANGELOG.md | 1 + isoseq3.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dfe8b8d..00113ca3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Isoseq3: Required more memory for common datasets. + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. diff --git a/isoseq3.wdl b/isoseq3.wdl index 44005a40..007aa002 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -31,7 +31,7 @@ task Refine { String outputNamePrefix Int cores = 2 - String memory = "1G" + String memory = "2G" String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } From c10cc771af14d9160980e2ee2c43a341a958197a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 14 Apr 2020 10:38:49 +0200 Subject: [PATCH 0348/1208] Update CHANGELOG. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00113ca3..df1032a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- -+ Isoseq3: Required more memory for common datasets. ++ Isoseq3: Requires more memory by default, is now 2G. + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. From 98494a219b12aeef2975c471ddcf241bbef0d457 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 15 Apr 2020 14:23:13 +0200 Subject: [PATCH 0349/1208] Remove dirname command from output folder creation step. --- CHANGELOG.md | 1 + isoseq3.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index df1032a4..70a8349a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Isoseq3: Remove dirname command from output folder creation step. + Isoseq3: Requires more memory by default, is now 2G. + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. diff --git a/isoseq3.wdl b/isoseq3.wdl index 007aa002..474709a5 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -37,7 +37,7 @@ task Refine { command <<< set -e - mkdir -p "$(dirname ~{outputDir})" + mkdir -p "~{outputDir}" isoseq3 refine \ --min-polya-length ~{minPolyAlength} \ ~{true="--require-polya" false="" requirePolyA} \ From 92c7e92334726396ce1ac9127c3513c86cd33cd0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 15 Apr 2020 14:30:23 +0200 Subject: [PATCH 0350/1208] Update brackets. --- isoseq3.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/isoseq3.wdl b/isoseq3.wdl index 474709a5..10d87bbc 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -35,7 +35,7 @@ task Refine { String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } - command <<< + command { set -e mkdir -p "~{outputDir}" isoseq3 refine \ @@ -47,7 +47,7 @@ task Refine { ~{inputBamFile} \ ~{primerFile} \ "~{outputDir}/~{outputNamePrefix}.bam" - >>> + } output { File outputFLNCfile = outputDir + "/" + outputNamePrefix + ".bam" From 117bde45f272ede019276e2c720c84d2c7d8d7a4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 20 Apr 2020 15:56:04 +0200 Subject: [PATCH 0351/1208] Add new tasks to samtools and picard. --- CHANGELOG.md | 3 ++ picard.wdl | 43 +++++++++++++++++++++++++++++ samtools.wdl | 78 ++++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 113 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70a8349a..3beefefd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Generalize sort task to now also sort by position, instead of just read name. ++ Add CreateSequenceDictionary task to picard. ++ Add faidx task to samtools. + Isoseq3: Remove dirname command from output folder creation step. + Isoseq3: Requires more memory by default, is now 2G. + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. diff --git a/picard.wdl b/picard.wdl index 7df96aa9..2a0121dc 100644 --- a/picard.wdl +++ b/picard.wdl @@ -313,6 +313,49 @@ task CollectTargetedPcrMetrics { } } +task CreateSequenceDictionary { + input { + File inputFile + String outputDir + String basenameInputFile = basename(inputFile) + + String memory = "2G" + String javaXmx = "2G" + String dockerImage = "quay.io/biocontainers/picard:2.22.3--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputDir})" + picard -Xmx~{javaXmx} \ + CreateSequenceDictionary \ + REFERENCE=~{inputFile} \ + OUTPUT="~{outputDir}/~{basenameInputFile}.dict" + } + + output { + File outputDict = outputDir + "/" + basenameInputFile + ".dict" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + basenameInputFile: {description: "The basename of the input file.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputDict: {description: "Dictionary of the input fasta file."} + } +} + # Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs task GatherBamFiles { input { diff --git a/samtools.wdl b/samtools.wdl index a4a893a1..9198119a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -57,6 +57,46 @@ task BgzipAndIndex { } } +task Faidx { + input { + File inputFile + String outputDir + String basenameInputFile = basename(inputFile) + + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + } + + command <<< + set -e + mkdir -p "$(dirname ~{outputDir})" + ln -s ~{inputFile} "~{outputDir}/~{basenameInputFile}" + samtools faidx \ + "~{outputDir}/~{basenameInputFile}" + >>> + + output { + File outputIndex = outputDir + "/" + basenameInputFile + ".fai" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + basenameInputFile: {description: "The basename of the input file.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputIndex: {description: "Index of the input fasta file."} + } +} + task Index { input { File bamFile @@ -136,34 +176,50 @@ task Merge { } } -task SortByName { +task Sort { input { - File bamFile - String outputBamPath = "namesorted.bam" + File inputBam + String outputPrefix + Boolean sortByName = false + String outputFormat = "BAM" - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + Int cores = 1 + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } command { set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools sort -n ~{bamFile} -o ~{outputBamPath} + mkdir -p "$(dirname ~{outputPrefix})" + samtools sort \ + ~{true="-n" false="" sortByName} \ + "--output-fmt " ~{outputFormat} \ + --threads ~{cores} \ + -o "~{outputPrefix}.sorted.bam" \ + ~{inputBam} } output { - File outputBam = outputBamPath + File outputSortedBam = outputPrefix + ".sorted.bam" } runtime { + cpu: cores + memory: memory docker: dockerImage } parameter_meta { # inputs - bamFile: {description: "The BAM file to get sorted.", category: "required"} - outputBamPath: {description: "The location the sorted BAM file should be written to.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + inputFile: {description: "The input SAM file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputSortedBAM: {description: "Sorted BAM file."} } } From 7627c314c98f1d179b5031053d575f13404cc8fc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 20 Apr 2020 16:02:32 +0200 Subject: [PATCH 0352/1208] Fix travis error. --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 9198119a..20f7ef0a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -211,7 +211,7 @@ task Sort { parameter_meta { # inputs - inputFile: {description: "The input SAM file.", category: "required"} + inputBam: {description: "The input SAM file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -219,7 +219,7 @@ task Sort { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputSortedBAM: {description: "Sorted BAM file."} + outputSortedBam: {description: "Sorted BAM file."} } } From e3890adf453835a2fabca33973bdfe976beb0127 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 09:03:45 +0200 Subject: [PATCH 0353/1208] Address GitHub comments. --- samtools.wdl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 20f7ef0a..82e82a05 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -70,7 +70,7 @@ task Faidx { command <<< set -e mkdir -p "$(dirname ~{outputDir})" - ln -s ~{inputFile} "~{outputDir}/~{basenameInputFile}" + ln ~{inputFile} "~{outputDir}/~{basenameInputFile}" samtools faidx \ "~{outputDir}/~{basenameInputFile}" >>> @@ -181,20 +181,21 @@ task Sort { File inputBam String outputPrefix Boolean sortByName = false - String outputFormat = "BAM" - Int cores = 1 String memory = "2G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + + Int? threads } command { set -e mkdir -p "$(dirname ~{outputPrefix})" samtools sort \ + "-l 1" ~{true="-n" false="" sortByName} \ - "--output-fmt " ~{outputFormat} \ - --threads ~{cores} \ + "--output-fmt BAM" \ + ~{"--threads " + threads} \ -o "~{outputPrefix}.sorted.bam" \ ~{inputBam} } @@ -204,7 +205,7 @@ task Sort { } runtime { - cpu: cores + cpu: 1 + select_first([threads, 0]) memory: memory docker: dockerImage } @@ -214,9 +215,9 @@ task Sort { inputBam: {description: "The input SAM file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} - cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads that need to be added to the task.", category: "advanced"} # outputs outputSortedBam: {description: "Sorted BAM file."} From 90c0270d1a02c56e28267306138dc0eec4449c81 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 09:21:55 +0200 Subject: [PATCH 0354/1208] Update documentation. --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 82e82a05..6c523947 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -217,7 +217,7 @@ task Sort { sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - threads: {description: "The number of threads that need to be added to the task.", category: "advanced"} + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} # outputs outputSortedBam: {description: "Sorted BAM file."} From f423e53e31ba53ec6db7ba5aad244b51ea28e6db Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 09:31:38 +0200 Subject: [PATCH 0355/1208] Add GC parameter. --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 2a0121dc..b78e6039 100644 --- a/picard.wdl +++ b/picard.wdl @@ -328,6 +328,7 @@ task CreateSequenceDictionary { set -e mkdir -p "$(dirname ~{outputDir})" picard -Xmx~{javaXmx} \ + -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ REFERENCE=~{inputFile} \ OUTPUT="~{outputDir}/~{basenameInputFile}.dict" From 441890ee663cfe623c13f0f5b290a5cc6d6524da Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 10:20:49 +0200 Subject: [PATCH 0356/1208] Address comments. --- picard.wdl | 10 ++++------ samtools.wdl | 18 +++++++++--------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/picard.wdl b/picard.wdl index b78e6039..5a759247 100644 --- a/picard.wdl +++ b/picard.wdl @@ -317,25 +317,24 @@ task CreateSequenceDictionary { input { File inputFile String outputDir - String basenameInputFile = basename(inputFile) - String memory = "2G" + String memory = "3G" String javaXmx = "2G" String dockerImage = "quay.io/biocontainers/picard:2.22.3--0" } command { set -e - mkdir -p "$(dirname ~{outputDir})" + mkdir -p "~{outputDir}" picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ REFERENCE=~{inputFile} \ - OUTPUT="~{outputDir}/~{basenameInputFile}.dict" + OUTPUT="~{outputDir}/$(basename ~{inputFile}).dict" } output { - File outputDict = outputDir + "/" + basenameInputFile + ".dict" + File outputDict = outputDir + "/" + basename(InputFile) + ".dict" } runtime { @@ -347,7 +346,6 @@ task CreateSequenceDictionary { # inputs inputFile: {description: "The input fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} - basenameInputFile: {description: "The basename of the input file.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/samtools.wdl b/samtools.wdl index 6c523947..15ea9a20 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -67,13 +67,13 @@ task Faidx { String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } - command <<< + command { set -e - mkdir -p "$(dirname ~{outputDir})" + mkdir -p "~{outputDir}" ln ~{inputFile} "~{outputDir}/~{basenameInputFile}" samtools faidx \ "~{outputDir}/~{basenameInputFile}" - >>> + } output { File outputIndex = outputDir + "/" + basenameInputFile + ".fai" @@ -179,8 +179,9 @@ task Merge { task Sort { input { File inputBam - String outputPrefix + String outputPath Boolean sortByName = false + Int compressionLevel = 1 String memory = "2G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -190,18 +191,17 @@ task Sort { command { set -e - mkdir -p "$(dirname ~{outputPrefix})" + mkdir -p "~{outputPath}" samtools sort \ - "-l 1" + "-l " ~{compressionLevel} \ ~{true="-n" false="" sortByName} \ - "--output-fmt BAM" \ ~{"--threads " + threads} \ - -o "~{outputPrefix}.sorted.bam" \ + "-o " ~{outputPath} \ ~{inputBam} } output { - File outputSortedBam = outputPrefix + ".sorted.bam" + File outputSortedBam = outputPath } runtime { From a80da1d97ee587ee563a77175dfee0b559cdae80 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 10:44:54 +0200 Subject: [PATCH 0357/1208] Reinstate symlink. --- picard.wdl | 2 +- samtools.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 5a759247..5393cd3a 100644 --- a/picard.wdl +++ b/picard.wdl @@ -334,7 +334,7 @@ task CreateSequenceDictionary { } output { - File outputDict = outputDir + "/" + basename(InputFile) + ".dict" + File outputDict = outputDir + "/" + basename(inputFile) + ".dict" } runtime { diff --git a/samtools.wdl b/samtools.wdl index 15ea9a20..24ae7f94 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -70,7 +70,7 @@ task Faidx { command { set -e mkdir -p "~{outputDir}" - ln ~{inputFile} "~{outputDir}/~{basenameInputFile}" + ln -s ~{inputFile} "~{outputDir}/~{basenameInputFile}" samtools faidx \ "~{outputDir}/~{basenameInputFile}" } From c03c5562da06382ba5447b7993866d6ba47cd4b3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 11:05:55 +0200 Subject: [PATCH 0358/1208] Fix travis error. --- samtools.wdl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 24ae7f94..ad7799cb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -61,7 +61,6 @@ task Faidx { input { File inputFile String outputDir - String basenameInputFile = basename(inputFile) String memory = "2G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -70,13 +69,13 @@ task Faidx { command { set -e mkdir -p "~{outputDir}" - ln -s ~{inputFile} "~{outputDir}/~{basenameInputFile}" + ln -s ~{inputFile} "~{outputDir}/$(basename ~{inputFile})" samtools faidx \ - "~{outputDir}/~{basenameInputFile}" + "~{outputDir}/$(basename ~{inputFile})" } output { - File outputIndex = outputDir + "/" + basenameInputFile + ".fai" + File outputIndex = outputDir + "/" + basename(inputFile) + ".fai" } runtime { @@ -88,7 +87,6 @@ task Faidx { # inputs inputFile: {description: "The input fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} - basenameInputFile: {description: "The basename of the input file.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -191,7 +189,7 @@ task Sort { command { set -e - mkdir -p "~{outputPath}" + mkdir -p "$(dirname ~{outputPath})" samtools sort \ "-l " ~{compressionLevel} \ ~{true="-n" false="" sortByName} \ @@ -213,8 +211,9 @@ task Sort { parameter_meta { # inputs inputBam: {description: "The input SAM file.", category: "required"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} From 78cb350a881a67400c7d5b3f62fa337331912e18 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Apr 2020 14:24:08 +0200 Subject: [PATCH 0359/1208] Update CHANGELOG.md. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3beefefd..4a95484e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Samtools SortByName is now called Sort. + Generalize sort task to now also sort by position, instead of just read name. + Add CreateSequenceDictionary task to picard. + Add faidx task to samtools. From d9265c317f404c26f0bfd0a2950fd716e9204e56 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 28 Apr 2020 13:36:45 +0200 Subject: [PATCH 0360/1208] Fix quotations in samtools sort. --- CHANGELOG.md | 1 + samtools.wdl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a95484e..c12fcecc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. --> version 3.1.0 --------------------------- ++ Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. + Generalize sort task to now also sort by position, instead of just read name. + Add CreateSequenceDictionary task to picard. diff --git a/samtools.wdl b/samtools.wdl index ad7799cb..5521c6aa 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -191,10 +191,10 @@ task Sort { set -e mkdir -p "$(dirname ~{outputPath})" samtools sort \ - "-l " ~{compressionLevel} \ + -l ~{compressionLevel} \ ~{true="-n" false="" sortByName} \ ~{"--threads " + threads} \ - "-o " ~{outputPath} \ + -o ~{outputPath} \ ~{inputBam} } From 169de2d4c1c30245079bcae2e13caac2df1a9f40 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 28 Apr 2020 16:44:22 +0200 Subject: [PATCH 0361/1208] Fix CHANGELOG. --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c12fcecc..1d330414 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 3.1.0 + +version 3.2.0 --------------------------- + Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. @@ -20,6 +21,9 @@ version 3.1.0 + Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. + +version 3.1.0 +--------------------------- + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the same task: 1. Output BAM compression level to 1. + Lima: Add missing output to parameter_meta. From 8aafaabd2bbb4acc175a1aa873ae49c313624d46 Mon Sep 17 00:00:00 2001 From: Jasper Date: Thu, 30 Apr 2020 09:27:54 +0200 Subject: [PATCH 0362/1208] Update CHANGELOG.md Co-Authored-By: DavyCats --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d330414..e4dea7c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 3.2.0 +version 3.2.0-develop --------------------------- + Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. From 287fc97023de885514d545965a78f77c732d0261 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 May 2020 15:48:38 +0200 Subject: [PATCH 0363/1208] more time_minutes --- gatk.wdl | 113 ++++++++++++++++++++++++++++++++++++++------------ umi-tools.wdl | 4 +- 2 files changed, 89 insertions(+), 28 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index e6b86eed..31d895fd 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -34,6 +34,7 @@ task AnnotateIntervals { String memory = "10G" String javaXmx = "2G" + Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -57,6 +58,7 @@ task AnnotateIntervals { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -73,6 +75,7 @@ task AnnotateIntervals { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -223,6 +226,7 @@ task CalculateContamination { String memory = "24G" String javaXmx = "12G" + Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -243,6 +247,7 @@ task CalculateContamination { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -252,6 +257,7 @@ task CalculateContamination { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -262,8 +268,9 @@ task CallCopyRatioSegments { String outputPrefix File copyRatioSegments - String memory = "21G" - String javaXmx = "6G" + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -283,6 +290,7 @@ task CallCopyRatioSegments { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -292,6 +300,7 @@ task CallCopyRatioSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -307,8 +316,10 @@ task CollectAllelicCounts { File referenceFasta File referenceFastaDict File referenceFastaFai - String memory = "90G" - String javaXmx = "30G" + + String memory = "12G" + String javaXmx = "10G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -329,6 +340,7 @@ task CollectAllelicCounts { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -344,6 +356,7 @@ task CollectAllelicCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -360,8 +373,9 @@ task CollectReadCounts { File referenceFastaFai String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "35G" - String javaXmx = "7G" + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -384,6 +398,7 @@ task CollectReadCounts { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -399,6 +414,7 @@ task CollectReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -474,8 +490,9 @@ task CombineVariants { Array[File]+ variantIndexes String outputPath - String memory = "24G" + String memory = "16G" String javaXmx = "12G" + Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" } @@ -510,6 +527,7 @@ task CombineVariants { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -527,6 +545,7 @@ task CombineVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -538,8 +557,9 @@ task CreateReadCountPanelOfNormals { Array[File]+ readCountsFiles File? annotatedIntervals - String memory = "21G" - String javaXmx = "7G" + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 5 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer causes a spark related error for some reason... } @@ -559,6 +579,7 @@ task CreateReadCountPanelOfNormals { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -570,6 +591,7 @@ task CreateReadCountPanelOfNormals { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -582,8 +604,9 @@ task DenoiseReadCounts { File readCounts String outputPrefix - String memory = "39G" - String javaXmx = "13G" + String memory = "6G" + String javaXmx = "4G" + Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -606,6 +629,7 @@ task DenoiseReadCounts { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -618,6 +642,7 @@ task DenoiseReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -637,8 +662,9 @@ task FilterMutectCalls { Int uniqueAltReadCount = 4 File mutect2Stats - String memory = "24G" + String memory = "16G" String javaXmx = "12G" + Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -667,6 +693,7 @@ task FilterMutectCalls { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -686,6 +713,7 @@ task FilterMutectCalls { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -743,8 +771,9 @@ task GenomicsDBImport { String genomicsDBWorkspacePath = "genomics_db" String genomicsDBTarFile = "genomics_db.tar.gz" String? tmpDir - String memory = "12G" + String memory = "6G" String javaXmx = "4G" + Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -766,6 +795,7 @@ task GenomicsDBImport { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -780,6 +810,7 @@ task GenomicsDBImport { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -865,8 +896,9 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "24G" + String memory = "16G" String javaXmx = "12G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -886,6 +918,7 @@ task GetPileupSummaries { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -901,6 +934,7 @@ task GetPileupSummaries { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -996,8 +1030,9 @@ task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "24G" + String memory = "16G" String javaXmx = "12G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -1015,6 +1050,7 @@ task LearnReadOrientationModel { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1023,6 +1059,7 @@ task LearnReadOrientationModel { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1032,8 +1069,9 @@ task MergeStats { input { Array[File]+ stats - String memory = "28G" + String memory = "16G" String javaXmx = "14G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1051,6 +1089,7 @@ task MergeStats { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1059,6 +1098,7 @@ task MergeStats { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1076,8 +1116,9 @@ task ModelSegments { else 30 Int maximumNumberOfSmoothingIterations = 10 - String memory = "64G" + String memory = "12G" String javaXmx = "10G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1111,6 +1152,7 @@ task ModelSegments { runtime { docker: dockerImage + time_minute: timeMinutes memory: memory } @@ -1126,6 +1168,7 @@ task ModelSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1149,8 +1192,9 @@ task MuTect2 { Array[File]+ intervals String outputStats = outputVcf + ".stats" - String memory = "16G" + String memory = "6G" String javaXmx = "4G" + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1179,6 +1223,7 @@ task MuTect2 { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1201,6 +1246,7 @@ task MuTect2 { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1215,8 +1261,9 @@ task PlotDenoisedCopyRatios { File denoisedCopyRatios Int? minimumContigLength - String memory = "32G" - String javaXmx = "7G" + String memory = "6G" + String javaXmx = "4G" + Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } @@ -1244,6 +1291,7 @@ task PlotDenoisedCopyRatios { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1257,6 +1305,7 @@ task PlotDenoisedCopyRatios { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1272,8 +1321,9 @@ task PlotModeledSegments { File allelicCounts Int? minimumContigLength - String memory = "21G" - String javaXmx = "7G" + String memory = "6G" + String javaXmx = "4G" + Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } @@ -1297,6 +1347,7 @@ task PlotModeledSegments { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1311,6 +1362,7 @@ task PlotModeledSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1327,8 +1379,9 @@ task PreprocessIntervals { Int padding = if defined(intervals) then 250 else 0 String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "10G" - String javaXmx = "2G" + String memory = "6G" + String javaXmx = "5G" + Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1352,6 +1405,7 @@ task PreprocessIntervals { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1367,6 +1421,7 @@ task PreprocessIntervals { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1382,8 +1437,9 @@ task SelectVariants { String outputPath = "output.vcf.gz" String? selectTypeToInclude Array[File] intervals = [] - String memory = "16G" + String memory = "6G" String javaXmx = "4G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1406,6 +1462,7 @@ task SelectVariants { runtime { docker: dockerImage + time_minute: timeMinutes memory: memory } @@ -1424,6 +1481,7 @@ task SelectVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -1498,8 +1556,9 @@ task VariantFiltration { Array[String]+ filterArguments Array[File] intervals = [] - String memory = "16G" + String memory = "6G" String javaXmx = "4G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1522,6 +1581,7 @@ task VariantFiltration { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -1540,6 +1600,7 @@ task VariantFiltration { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/umi-tools.wdl b/umi-tools.wdl index 608924f3..bd09853a 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -75,8 +75,8 @@ task Dedup { String? statsPrefix Boolean paired = true - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 18) + String memory = "20G" + Int timeMinutes = 600 + ceil(size(inputBam, "G") * 60) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" From 84ba1ef7e6676fa4a57a78a1976be3b0a1dff05e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 May 2020 14:57:26 +0200 Subject: [PATCH 0364/1208] time_minutes! --- gatk.wdl | 4 ++-- manta.wdl | 6 ++++++ picard.wdl | 5 ++++- samtools.wdl | 8 +++++++- somaticseq.wdl | 15 +++++++++++++++ strelka.wdl | 6 ++++++ vardict.wdl | 5 ++++- 7 files changed, 44 insertions(+), 5 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 31d895fd..7964b519 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -664,7 +664,7 @@ task FilterMutectCalls { String memory = "16G" String javaXmx = "12G" - Int timeMinutes = 180 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" } @@ -1071,7 +1071,7 @@ task MergeStats { String memory = "16G" String javaXmx = "14G" - Int timeMinutes = 120 + Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } diff --git a/manta.wdl b/manta.wdl index 5006a01e..5382d2a5 100644 --- a/manta.wdl +++ b/manta.wdl @@ -33,6 +33,7 @@ task Germline { Int cores = 1 Int memoryGb = 4 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } @@ -60,6 +61,7 @@ task Germline { cpu: cores memory: "~{memoryGb}G" docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -74,6 +76,7 @@ task Germline { exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The the number of cores required to run a program", category: "required"} memoryGb: {description: "The memory required to run the manta", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -93,6 +96,7 @@ task Somatic { Int cores = 1 Int memoryGb = 4 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } @@ -130,6 +134,7 @@ task Somatic { cpu: cores memory: "~{memoryGb}G" docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -145,6 +150,7 @@ task Somatic { exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/picard.wdl b/picard.wdl index d19e3ac4..494ccf98 100644 --- a/picard.wdl +++ b/picard.wdl @@ -619,8 +619,9 @@ task SortVcf { String outputVcfPath File? dict - String memory = "24G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -642,6 +643,7 @@ task SortVcf { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -654,6 +656,7 @@ task SortVcf { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/samtools.wdl b/samtools.wdl index 5ffebc9c..bdf811a0 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,6 +26,7 @@ task BgzipAndIndex { String outputDir String type = "vcf" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -44,7 +45,8 @@ task BgzipAndIndex { } runtime { - docker: dockerImage + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -52,6 +54,7 @@ task BgzipAndIndex { inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -395,6 +398,7 @@ task View { Int threads = 1 String memory = "1G" + Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } String outputIndexPath = basename(outputFileName) + ".bai" @@ -424,6 +428,7 @@ task View { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -440,6 +445,7 @@ task View { threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/somaticseq.wdl b/somaticseq.wdl index 49e5c36d..7b9a4403 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -47,6 +47,7 @@ task ParallelPaired { File? strelkaIndel Int threads = 1 + Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -89,6 +90,7 @@ task ParallelPaired { runtime { cpu: threads + time_minutes: timeMinutes docker: dockerImage } @@ -118,6 +120,7 @@ task ParallelPaired { strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -150,6 +153,7 @@ task ParallelPairedTrain { File? strelkaIndel Int threads = 1 + Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -191,6 +195,7 @@ task ParallelPairedTrain { runtime { cpu: threads + time_minutes: timeMinutes docker: dockerImage } @@ -220,6 +225,7 @@ task ParallelPairedTrain { strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -244,6 +250,7 @@ task ParallelSingle { File? strelkaVCF Int threads = 1 + Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -279,6 +286,7 @@ task ParallelSingle { runtime { cpu: threads + time_minutes: timeMinutes docker: dockerImage } @@ -300,6 +308,7 @@ task ParallelSingle { strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -324,6 +333,7 @@ task ParallelSingleTrain { File? strelkaVCF Int threads = 1 + Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -358,6 +368,7 @@ task ParallelSingleTrain { runtime { cpu: threads + time_minutes: timeMinutes docker: dockerImage } @@ -379,6 +390,7 @@ task ParallelSingleTrain { strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -389,6 +401,7 @@ task ModifyStrelka { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") String dockerImage = "lethalfang/somaticseq:3.1.0" + Int timeMinutes = 20 } command { @@ -407,12 +420,14 @@ task ModifyStrelka { } runtime { + time_minutes: timeMinutes docker: dockerImage } parameter_meta { strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/strelka.wdl b/strelka.wdl index 826cbd8e..50c38b55 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -36,6 +36,7 @@ task Germline { Int cores = 1 Int memoryGb = 4 + Int timeMinutes = 90 String dockerImage = "quay.io/biocontainers/strelka:2.9.7--0" } @@ -62,6 +63,7 @@ task Germline { runtime { docker: dockerImage cpu: cores + time_minutes: timeMinutes memory: "~{memoryGb}G" } @@ -78,6 +80,7 @@ task Germline { cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -100,6 +103,7 @@ task Somatic { Int cores = 1 Int memoryGb = 4 + Int timeMinutes = 90 String dockerImage = "quay.io/biocontainers/strelka:2.9.7--0" File? doNotDefineThis #FIXME @@ -131,6 +135,7 @@ task Somatic { runtime { docker: dockerImage cpu: cores + time_minutes: timeMinutes memory: "~{memoryGb}G" } @@ -150,6 +155,7 @@ task Somatic { cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/vardict.wdl b/vardict.wdl index 7bfd118e..ffd05547 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -48,8 +48,9 @@ task VarDict { Float minimumAlleleFrequency = 0.02 Int threads = 1 - String memory = "40G" + String memory = "20G" String javaXmx = "16G" + Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } @@ -87,6 +88,7 @@ task VarDict { runtime { cpu: threads + 2 memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -116,6 +118,7 @@ task VarDict { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 515df50db5b47e108b9d1e0a3c13a1ad269f4104 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 May 2020 16:14:40 +0200 Subject: [PATCH 0365/1208] minor adjustments --- bedtools.wdl | 15 +++++++-------- biopet/bamstats.wdl | 2 +- biopet/biopet.wdl | 21 +++++++++++++-------- biopet/sampleconfig.wdl | 6 +++--- biopet/seqstat.wdl | 2 +- bowtie.wdl | 5 ----- clever.wdl | 3 +++ collect-columns.wdl | 30 ++++++++++-------------------- common.wdl | 4 +++- fastqc.wdl | 9 +++++---- gatk.wdl | 32 ++++++++++++++++---------------- 11 files changed, 62 insertions(+), 67 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 4f16b7c0..a64cef1a 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -66,6 +66,7 @@ task Merge { input { File inputBed String outputBed = "merged.bed" + Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -78,18 +79,16 @@ task Merge { } runtime { + time_minutes: timeMinutes docker: dockerImage } parameter_meta { - inputBed: {description: "The bed to merge", - category: "required"} - outputBed: {description: "The path to write the output to", - category: "advanced"} - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + inputBed: {description: "The bed to merge.", category: "required"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index 7def9aec..af01bb2e 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -34,7 +34,7 @@ task Generate { String outputDir Reference? reference - String memory = "16G" + String memory = "10G" String javaXmx = "8G" } diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 9004f917..b90c5f4c 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -31,7 +31,7 @@ task BaseCounter { String outputDir String prefix - String memory = "14G" + String memory = "5G" String javaXmx = "4G" } @@ -104,9 +104,10 @@ task ExtractAdaptersFastqc { Float? adapterCutoff Boolean? outputAsFasta - String memory = "40G" # This is ridiculous, but needed due to vmem monitoring on SGE. + String memory = "10G" String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" + Int timeMinutes = 5 } command { @@ -133,6 +134,7 @@ task ExtractAdaptersFastqc { runtime { memory: memory docker: dockerImage + time_minutes: timeMinutes } } @@ -143,7 +145,7 @@ task FastqSplitter { Array[String]+ outputPaths File? toolJar - String memory = "12G" + String memory = "5G" String javaXmx = "4G" String dockerImage = "quay.io/biocontainers/biopet-fastqsplitter:0.1--2" } @@ -175,7 +177,7 @@ task FastqSync { String out2path File? toolJar - String memory = "10G" + String memory = "5G" String javaXmx = "4G" } @@ -216,6 +218,7 @@ task ReorderGlobbedScatters { # The 3.7-slim container is 143 mb on the filesystem. 3.7 is 927 mb. # The slim container is sufficient for this small task. String dockerImage = "python:3.7-slim" + Int timeMinutes = 5 } command <<< @@ -243,12 +246,14 @@ task ReorderGlobbedScatters { runtime { docker: dockerImage + time_minutes = timeMinutes # 4 gigs of memory to be able to build the docker image in singularity memory: "4G" } parameter_meta { scatters: {description: "The files which should be ordered.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -337,7 +342,7 @@ task ValidateAnnotation { File? gtfFile Reference reference - String memory = "9G" + String memory = "4G" String javaXmx = "3G" String dockerImage = "quay.io/biocontainers/biopet-validateannotation:0.1--0" } @@ -363,7 +368,7 @@ task ValidateFastq { input { File read1 File? read2 - String memory = "9G" + String memory = "4G" String javaXmx = "3G" String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--1" } @@ -388,7 +393,7 @@ task ValidateVcf { input { IndexedVcfFile vcf Reference reference - String memory = "9G" + String memory = "4G" String javaXmx = "3G" String dockerImage = "quay.io/biocontainers/biopet-validatevcf:0.1--0" } @@ -432,7 +437,7 @@ task VcfStats { Array[String]+? sparkConfigValues String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" - String memory = "12G" + String memory = "5G" String javaXmx = "4G" } diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl index 0fbd466a..50f26311 100644 --- a/biopet/sampleconfig.wdl +++ b/biopet/sampleconfig.wdl @@ -34,7 +34,7 @@ task SampleConfig { String? jsonOutputPath String? tsvOutputPath - String memory = "8G" + String memory = "18G" String javaXmx = "16G" } @@ -74,7 +74,7 @@ task SampleConfigCromwellArrays { Array[File]+ inputFiles String outputPath - String memory = "8G" + String memory = "5G" String javaXmx = "4G" } @@ -110,7 +110,7 @@ task CaseControl { String outputPath String controlTag = "control" - String memory = "8G" + String memory = "5G" String javaXmx = "4G" } diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl index 6694a759..e3a55ec3 100644 --- a/biopet/seqstat.wdl +++ b/biopet/seqstat.wdl @@ -32,7 +32,7 @@ task Generate { String library String readgroup - String memory = "10G" + String memory = "5G" String javaXmx = "4G" } diff --git a/bowtie.wdl b/bowtie.wdl index 87427e7d..500afea6 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -105,8 +105,3 @@ task Bowtie { category: "advanced"} } } - -struct BowtieIndex { - File fasta - Array[File] indexFiles -} \ No newline at end of file diff --git a/clever.wdl b/clever.wdl index 2da9f4d2..3a6515f7 100644 --- a/clever.wdl +++ b/clever.wdl @@ -37,6 +37,7 @@ task Mateclever { Int threads = 10 String memory = "15G" + Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -63,6 +64,7 @@ task Mateclever { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -78,6 +80,7 @@ task Mateclever { outputPath: {description: "The location the output VCF file should be written.", category: "common"} threads: {description: "The the number of threads required to run a program", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/collect-columns.wdl b/collect-columns.wdl index ed2a4577..e4e3a948 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -65,26 +65,16 @@ task CollectColumns { } parameter_meta { - inputTables: {description: "The tables from which columns should be taken.", - category: "required"} - outputPath: {description: "The path to which the output should be written.", - category: "required"} - featureColumn: {description: "Equivalent to the -f option of collect-columns.", - category: "advanced"} - valueColumn: {description: "Equivalent to the -c option of collect-columns.", - category: "advanced"} - separator: {description: "Equivalent to the -s option of collect-columns.", - category: "advanced"} - sampleNames: {description: "Equivalent to the -n option of collect-columns.", - category: "advanced"} - header: {description: "Equivalent to the -H flag of collect-columns.", - category: "advanced"} - additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", - category: "advanced"} - referenceGtf: {description: "Equivalent to the -g option of collect-columns.", - category: "advanced"} - featureAttribute: {description: "Equivalent to the -F option of collect-columns.", - category: "advanced"} + inputTables: {description: "The tables from which columns should be taken.", category: "required"} + outputPath: {description: "The path to which the output should be written.", category: "required"} + featureColumn: {description: "Equivalent to the -f option of collect-columns.", category: "advanced"} + valueColumn: {description: "Equivalent to the -c option of collect-columns.", category: "advanced"} + separator: {description: "Equivalent to the -s option of collect-columns.", category: "advanced"} + sampleNames: {description: "Equivalent to the -n option of collect-columns.", category: "advanced"} + header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} + additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", category: "advanced"} + referenceGtf: {description: "Equivalent to the -g option of collect-columns.", category: "advanced"} + featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} memoryGb: {description: "The maximum amount of memory the job will need in GB", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", diff --git a/common.wdl b/common.wdl index be60f8cf..88848df2 100644 --- a/common.wdl +++ b/common.wdl @@ -179,10 +179,10 @@ task StringArrayMd5 { } task TextToFile { - input { String text String outputFile = "out.txt" + Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -197,11 +197,13 @@ task TextToFile { parameter_meta { text: {description: "The text to print", category: "required"} outputFile: {description: "The name of the output file", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } runtime { memory: "1G" + time_minutes: timeMinutes docker: dockerImage } } diff --git a/fastqc.wdl b/fastqc.wdl index 606c1bd4..e24b6ce4 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -120,6 +120,7 @@ task Fastqc { task GetConfiguration { input { + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/fastqc:0.11.7--4" } @@ -142,13 +143,13 @@ task GetConfiguration { runtime { memory: "2G" # Needs more than 1 to pull the docker image + time_minute: timeMinutes docker: dockerImage } parameter_meta { - dockerImage: { - description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced" - } + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } diff --git a/gatk.wdl b/gatk.wdl index 7964b519..9fb80344 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -32,7 +32,7 @@ task AnnotateIntervals { File? segmentalDuplicationTrack Int featureQueryLookahead = 1000000 - String memory = "10G" + String memory = "3G" String javaXmx = "2G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -224,7 +224,7 @@ task CalculateContamination { File tumorPileups File? normalPileups - String memory = "24G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -490,7 +490,7 @@ task CombineVariants { Array[File]+ variantIndexes String outputPath - String memory = "16G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" @@ -662,7 +662,7 @@ task FilterMutectCalls { Int uniqueAltReadCount = 4 File mutect2Stats - String memory = "16G" + String memory = "14G" String javaXmx = "12G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -806,7 +806,7 @@ task GenomicsDBImport { genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored", category: "advanced"} genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored", category: "advanced"} tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers", - category: "advanced"} + category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -896,7 +896,7 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "16G" + String memory = "14G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1030,7 +1030,7 @@ task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "16G" + String memory = "14G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1192,7 +1192,7 @@ task MuTect2 { Array[File]+ intervals String outputStats = outputVcf + ".stats" - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -1261,8 +1261,8 @@ task PlotDenoisedCopyRatios { File denoisedCopyRatios Int? minimumContigLength - String memory = "6G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } @@ -1321,8 +1321,8 @@ task PlotModeledSegments { File allelicCounts Int? minimumContigLength - String memory = "6G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. } @@ -1379,8 +1379,8 @@ task PreprocessIntervals { Int padding = if defined(intervals) then 250 else 0 String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "6G" - String javaXmx = "5G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1437,7 +1437,7 @@ task SelectVariants { String outputPath = "output.vcf.gz" String? selectTypeToInclude Array[File] intervals = [] - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -1556,7 +1556,7 @@ task VariantFiltration { Array[String]+ filterArguments Array[File] intervals = [] - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" From c33f00ed95215f86f8b074fa8d068ac52395919c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 May 2020 16:31:51 +0200 Subject: [PATCH 0366/1208] various changes --- gatk.wdl | 42 +++++++++++++++++----------------- picard.wdl | 63 ++++++++++++++++++++++++--------------------------- rtg.wdl | 14 ++++++++---- samtools.wdl | 15 ++++++++++++ umi-tools.wdl | 8 ++++--- vardict.wdl | 4 ++-- 6 files changed, 82 insertions(+), 64 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 9fb80344..8e90f88a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -41,7 +41,7 @@ task AnnotateIntervals { command { set -e mkdir -p "$(dirname ~{annotatedIntervalsPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ AnnotateIntervals \ -R ~{referenceFasta} \ -L ~{intervals} \ @@ -102,7 +102,7 @@ task ApplyBQSR { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ ApplyBQSR \ --create-output-bam-md5 \ --add-output-sam-program-record \ @@ -232,7 +232,7 @@ task CalculateContamination { command { set -e - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CalculateContamination \ -I ~{tumorPileups} \ ~{"-matched " + normalPileups} \ @@ -277,7 +277,7 @@ task CallCopyRatioSegments { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CallCopyRatioSegments \ -I ~{copyRatioSegments} \ -O ~{outputPrefix}.called.seg @@ -326,7 +326,7 @@ task CollectAllelicCounts { command { set -e mkdir -p "$(dirname ~{allelicCountsPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CollectAllelicCounts \ -L ~{commonVariantSites} \ -I ~{inputBam} \ @@ -382,7 +382,7 @@ task CollectReadCounts { command { set -e mkdir -p "$(dirname ~{countsPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CollectReadCounts \ -L ~{intervals} \ -I ~{inputBam} \ @@ -511,7 +511,7 @@ task CombineVariants { printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" done ') - java -Xmx~{javaXmx} -jar /usr/GenomeAnalysisTK.jar \ + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 -jar /usr/GenomeAnalysisTK.jar \ -T CombineVariants \ -R ~{referenceFasta} \ --genotypemergeoption ~{genotypeMergeOption} \ @@ -566,7 +566,7 @@ task CreateReadCountPanelOfNormals { command { set -e mkdir -p "$(dirname ~{PONpath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ CreateReadCountPanelOfNormals \ -I ~{sep=" -I " readCountsFiles} \ ~{"--annotated-intervals " + annotatedIntervals} \ @@ -613,7 +613,7 @@ task DenoiseReadCounts { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ DenoiseReadCounts \ -I ~{readCounts} \ ~{"--count-panel-of-normals " + PON} \ @@ -671,7 +671,7 @@ task FilterMutectCalls { command { set -e mkdir -p "$(dirname ~{outputVcf})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ FilterMutectCalls \ -R ~{referenceFasta} \ -V ~{unfilteredVcf} \ @@ -780,7 +780,7 @@ task GenomicsDBImport { command { set -e mkdir -p "$(dirname ~{genomicsDBWorkspacePath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ GenomicsDBImport \ -V ~{sep=" -V " gvcfFiles} \ --genomicsdb-workspace-path ~{genomicsDBWorkspacePath} \ @@ -904,7 +904,7 @@ task GetPileupSummaries { command { set -e - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ GetPileupSummaries \ -I ~{sampleBam} \ -V ~{variantsForContamination} \ @@ -1038,7 +1038,7 @@ task LearnReadOrientationModel { command { set -e - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ LearnReadOrientationModel \ -I ~{sep=" -I " f1r2TarGz} \ -O "artifact-priors.tar.gz" @@ -1077,7 +1077,7 @@ task MergeStats { command { set -e - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ MergeMutectStats \ -stats ~{sep=" -stats " stats} \ -O "merged.stats" @@ -1125,7 +1125,7 @@ task ModelSegments { command { set -e mkdir -p ~{outputDir} - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ ModelSegments \ --denoised-copy-ratios ~{denoisedCopyRatios} \ --allelic-counts ~{allelicCounts} \ @@ -1201,7 +1201,7 @@ task MuTect2 { command { set -e mkdir -p "$(dirname ~{outputVcf})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ Mutect2 \ -R ~{referenceFasta} \ -I ~{sep=" -I " inputBams} \ @@ -1270,7 +1270,7 @@ task PlotDenoisedCopyRatios { command { set -e mkdir -p ~{outputDir} - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ PlotDenoisedCopyRatios \ --standardized-copy-ratios ~{standardizedCopyRatios} \ --denoised-copy-ratios ~{denoisedCopyRatios} \ @@ -1330,7 +1330,7 @@ task PlotModeledSegments { command { set -e mkdir -p ~{outputDir} - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ PlotModeledSegments \ --denoised-copy-ratios ~{denoisedCopyRatios} \ --allelic-counts ~{allelicCounts} \ @@ -1388,7 +1388,7 @@ task PreprocessIntervals { command { set -e mkdir -p "$(dirname ~{outputIntervalList})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ PreprocessIntervals \ -R ~{referenceFasta} \ --sequence-dictionary ~{referenceFastaDict} \ @@ -1446,7 +1446,7 @@ task SelectVariants { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ SelectVariants \ -R ~{referenceFasta} \ -V ~{inputVcf} \ @@ -1565,7 +1565,7 @@ task VariantFiltration { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options -Xmx~{javaXmx} \ + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ VariantFiltration \ -I ~{inputVcf} \ -R ~{referenceFasta} \ diff --git a/picard.wdl b/picard.wdl index 494ccf98..c9f9b835 100644 --- a/picard.wdl +++ b/picard.wdl @@ -26,8 +26,8 @@ task BedToIntervalList { File dict String outputPath = "regions.interval_list" - String memory = "5G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -56,8 +56,7 @@ task BedToIntervalList { # inputs bedFile: {description: "A bed file.", category: "required"} dict: {description: "A sequence dict file.", category: "required"} - outputPath: {description: "The location the output interval list should be written to.", - category: "advanced"} + outputPath: {description: "The location the output interval list should be written to.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -162,28 +161,23 @@ task CollectMultipleMetrics { parameter_meta { # inputs - inputBam: {description: "The input BAM file for which metrics will be collected.", - category: "required"} + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - basename: {description: "The basename/prefix of the output files (may include directories).", - category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", category: "advanced"} collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", category: "advanced"} qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", category: "advanced"} - meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", - category: "advanced"} + meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", category: "advanced"} collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", category: "advanced"} - collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", - category: "advanced"} + collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", category: "advanced"} collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", category: "advanced"} collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", @@ -237,12 +231,10 @@ task CollectRnaSeqMetrics { parameter_meta { # inputs - inputBam: {description: "The input BAM file for which metrics will be collected.", - category: "required"} + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} refRefflat: {description: "A refflat file containing gene annotations.", catehory: "required"} - basename: {description: "The basename/prefix of the output files (may include directories).", - category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.", category: "common"} @@ -266,8 +258,8 @@ task CollectTargetedPcrMetrics { Array[File]+ targetIntervals String basename - String memory = "5G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -300,11 +292,9 @@ task CollectTargetedPcrMetrics { parameter_meta { # inputs - inputBam: {description: "The input BAM file for which metrics will be collected.", - category: "required"} + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} @@ -312,8 +302,7 @@ task CollectTargetedPcrMetrics { category: "required"} targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.", category: "required"} - basename: {description: "The basename/prefix of the output files (may include directories).", - category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", @@ -331,8 +320,8 @@ task GatherBamFiles { Array[File]+ inputBamsIndex String outputBamPath - String memory = "5G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -381,8 +370,9 @@ task GatherVcfs { Array[File]+ inputVcfIndexes String outputVcfPath = "out.vcf.gz" - String memory = "12G" + String memory = "5G" String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -402,6 +392,7 @@ task GatherVcfs { runtime { docker: dockerImage memory: memory + time_minutes: timeMinutes } parameter_meta { @@ -413,6 +404,7 @@ task GatherVcfs { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -428,7 +420,7 @@ task MarkDuplicates { String memory = "10G" String javaXmx = "8G" - Int timeMinutes = 1 + ceil(size(inputBams, "G")* 8) + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" # The program default for READ_NAME_REGEX is appropriate in nearly every case. @@ -547,7 +539,7 @@ task SamToFastq { File inputBamIndex Boolean paired = true - String memory = "48G" + String memory = "18G" String javaXmx = "16G" # High memory default to avoid crashes. String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" File? NONE @@ -584,8 +576,8 @@ task ScatterIntervalList { File interval_list Int scatter_count - String memory = "12G" - String javaXmx = "4G" + String memory = "4G" + String javaXmx = "3G" String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -667,8 +659,9 @@ task RenameSample { File inputVcf String outputPath = "./picard/renamed.vcf" String newSampleName - String memory = "24G" + String memory = "10G" String javaXmx = "8G" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" } @@ -688,6 +681,7 @@ task RenameSample { runtime { docker: dockerImage + time_minutes: timeMinutes memory: memory } @@ -698,6 +692,7 @@ task RenameSample { newSampleName: {description: "A string to replace the old sample name.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/rtg.wdl b/rtg.wdl index 8fd53ca4..03a3f5dc 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -27,7 +27,8 @@ task Format { Array[File]+ inputFiles String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" String rtgMem = "8G" - String memory = "16G" + String memory = "10G" + Int timeMinutes = 1 + ceil(size(inputFiles) * 2) } command { @@ -45,15 +46,17 @@ task Format { runtime { docker: dockerImage memory: memory + time_minutes: timeMinutes } parameter_meta { - format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe] (Default is fasta)", + format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", category: "advanced"} outputPath: {description: "Where the output should be placed.", category: "advanced"} inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} } @@ -77,7 +80,8 @@ task VcfEval { String outputMode = "split" Int threads = 1 # tool default is number of cores in the system 😱 String rtgMem = "8G" - String memory = "16G" + String memory = "10G" + Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } @@ -131,6 +135,7 @@ task VcfEval { docker: dockerImage cpu: threads memory: memory + time_minutes: timeMinutes } parameter_meta { @@ -157,6 +162,7 @@ task VcfEval { threads: {description: "Number of threads. Default is 1", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} } diff --git a/samtools.wdl b/samtools.wdl index bdf811a0..b1c74857 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -115,6 +115,7 @@ task Merge { String outputBamPath = "merged.bam" Boolean force = true + Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -133,6 +134,7 @@ task Merge { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -140,6 +142,7 @@ task Merge { bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -150,6 +153,7 @@ task SortByName { File bamFile String outputBamPath = "namesorted.bam" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -165,12 +169,14 @@ task SortByName { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs bamFile: {description: "The BAM file to get sorted.", category: "required"} outputBamPath: {description: "The location the sorted BAM file should be written to.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -181,6 +187,7 @@ task Markdup { File inputBam String outputBamPath + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -196,12 +203,14 @@ task Markdup { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs inputBam: {description: "The BAM file to be processed.", category: "required"} outputBamPath: {description: "The location of the output BAM file.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -299,6 +308,7 @@ task Fastq { Int threads = 1 String memory = "1G" + Int timeMinutes = 1 + ceil(size(inputBam) * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } @@ -327,6 +337,7 @@ task Fastq { cpu: threads memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -342,6 +353,7 @@ task Fastq { outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -352,6 +364,7 @@ task Tabix { File inputFile String outputFilePath = "indexed.vcf.gz" String type = "vcf" + Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } # FIXME: It is better to do the indexing on VCF creation. Not in a separate task. With file localization this gets hairy fast. @@ -371,6 +384,7 @@ task Tabix { } runtime { + time_minutes: timeMinutes docker: dockerImage } @@ -380,6 +394,7 @@ task Tabix { outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/umi-tools.wdl b/umi-tools.wdl index bd09853a..c5f3b145 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -29,7 +29,7 @@ task Extract { Boolean threePrime = false String read1Output = "umi_extracted_R1.fastq.gz" String? read2Output = "umi_extracted_R2.fastq.gz" - + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -51,6 +51,7 @@ task Extract { runtime { docker: dockerImage + time_minutes: timeMinutes } parameter_meta { @@ -61,6 +62,7 @@ task Extract { threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} read1Output: {description: "The location to write the first/single-end output fastq file to.", category: "advanced"} read2Output: {description: "The location to write the second-end output fastq file to.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -75,8 +77,8 @@ task Dedup { String? statsPrefix Boolean paired = true - String memory = "20G" - Int timeMinutes = 600 + ceil(size(inputBam, "G") * 60) + String memory = "25G" + Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" diff --git a/vardict.wdl b/vardict.wdl index ffd05547..92beb32e 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -48,7 +48,7 @@ task VarDict { Float minimumAlleleFrequency = 0.02 Int threads = 1 - String memory = "20G" + String memory = "18G" String javaXmx = "16G" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" @@ -56,7 +56,7 @@ task VarDict { command { set -e -o pipefail - export JAVA_OPTS="-Xmx~{javaXmx}" + export JAVA_OPTS="-Xmx~{javaXmx} -XX:ParallelGCThreads=1" vardict-java \ ~{"-th " + threads} \ -G ~{referenceFasta} \ From 1a90d56fb7b6ceeaea3bf7e42a1b3f4f549d0da2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 7 May 2020 10:34:04 +0200 Subject: [PATCH 0367/1208] fix samtools sort --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 1c2f5d78..e123e635 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -195,7 +195,7 @@ task Sort { String memory = "2G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) Int? threads } From ac71e415eb25857073bce330b7b8b43ec38ddd01 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 7 May 2020 11:25:31 +0200 Subject: [PATCH 0368/1208] fix task order samtools and remove duplicate task --- samtools.wdl | 346 +++++++++++++++++++++++---------------------------- 1 file changed, 156 insertions(+), 190 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index e123e635..5648eb1c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -98,297 +98,301 @@ task Faidx { } } -task Index { +task Fastq { input { - File bamFile - String? outputBamPath - String memory = "2G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + File inputBam + String outputRead1 + String? outputRead2 + String? outputRead0 + Int? includeFilter + Int? excludeFilter + Int? excludeSpecificFilter + Boolean appendReadNumber = false + Boolean outputQuality = false + Int? compressionLevel + + Int threads = 1 + String memory = "1G" + Int timeMinutes = 1 + ceil(size(inputBam) * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - # Select_first is needed, otherwise womtool validate fails. - String outputPath = select_first([outputBamPath, basename(bamFile)]) - String bamIndexPath = sub(outputPath, "\.bam$", ".bai") - command { - bash -c ' - set -e - # Make sure outputBamPath does not exist. - if [ ! -f ~{outputPath} ] - then - mkdir -p "$(dirname ~{outputPath})" - ln ~{bamFile} ~{outputPath} - fi - samtools index ~{outputPath} ~{bamIndexPath} - ' + samtools fastq \ + ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ + ~{"-2 " + outputRead2} \ + ~{"-0 " + outputRead0} \ + ~{"-f " + includeFilter} \ + ~{"-F " + excludeFilter} \ + ~{"-G " + excludeSpecificFilter} \ + ~{true="-N" false="-n" appendReadNumber} \ + ~{true="-O" false="" outputQuality} \ + ~{"-c " + compressionLevel} \ + ~{"--threads " + threads} \ + ~{inputBam} } output { - File indexedBam = outputPath - File index = bamIndexPath + File read1 = outputRead1 + File? read2 = outputRead2 + File? read0 = outputRead0 } runtime { + cpu: threads memory: memory - time_minutes: timeMinutes docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs - bamFile: {description: "The BAM file for which an index should be made.", category: "required"} - outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", - category: "common"} - memory: {description: "The amount of memory needed for the job.", category: "advanced"} + inputBam: {description: "The bam file to process.", category: "required"} + outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} + outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} + outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} + includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} + excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} + excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} + appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} + outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Merge { +task FilterShortReadsBam { input { - Array[File]+ bamFiles - String outputBamPath = "merged.bam" - Boolean force = true - - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + File bamFile + String outputPathBam + String memory = "1G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - String indexPath = sub(outputBamPath, "\.bam$",".bai") + + String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") command { set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} - samtools index ~{outputBamPath} ~{indexPath} + mkdir -p "$(dirname ~{outputPathBam})" + samtools view -h ~{bamFile} | \ + awk 'length($10) > 30 || $1 ~/^@/' | \ + samtools view -bS -> ~{outputPathBam} + samtools index ~{outputPathBam} ~{outputPathBamIndex} } output { - File outputBam = outputBamPath - File outputBamIndex = indexPath + File filteredBam = outputPathBam + File filteredBamIndex = outputPathBamIndex } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - # inputs - bamFiles: {description: "The BAM files to merge.", category: "required"} - outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + bamFile: {description: "The bam file to process.", category: "required"} + outputPathBam: {description: "The filtered bam file.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Sort { +task Flagstat { input { File inputBam String outputPath - Boolean sortByName = false - Int compressionLevel = 1 - String memory = "2G" - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - Int? threads + String memory = "1G" + Int timeMinutes = 1 + ceil(size(inputBam, "G")) + String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } command { set -e mkdir -p "$(dirname ~{outputPath})" - samtools sort \ - -l ~{compressionLevel} \ - ~{true="-n" false="" sortByName} \ - ~{"--threads " + threads} \ - -o ~{outputPath} \ - ~{inputBam} + samtools flagstat ~{inputBam} > ~{outputPath} } output { - File outputSortedBam = outputPath + File flagstat = outputPath } runtime { - cpu: 1 + select_first([threads, 0]) memory: memory - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs - inputBam: {description: "The input SAM file.", category: "required"} - outputPath: {description: "Output directory path + output file.", category: "required"} - sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} + outputPath: {description: "The location the ouput should be written to.", category: "required"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - # outputs - outputSortedBam: {description: "Sorted BAM file."} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } -task Markdup { +task Index { input { - File inputBam - String outputBamPath - - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + File bamFile + String? outputBamPath + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } + # Select_first is needed, otherwise womtool validate fails. + String outputPath = select_first([outputBamPath, basename(bamFile)]) + String bamIndexPath = sub(outputPath, "\.bam$", ".bai") + command { + bash -c ' set -e - mkdir -p "$(dirname ~{outputBamPath})" - samtools markdup ~{inputBam} ~{outputBamPath} + # Make sure outputBamPath does not exist. + if [ ! -f ~{outputPath} ] + then + mkdir -p "$(dirname ~{outputPath})" + ln ~{bamFile} ~{outputPath} + fi + samtools index ~{outputPath} ~{bamIndexPath} + ' } output { - File outputBam = outputBamPath + File indexedBam = outputPath + File index = bamIndexPath } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs - inputBam: {description: "The BAM file to be processed.", category: "required"} - outputBamPath: {description: "The location of the output BAM file.", category: "required"} + bamFile: {description: "The BAM file for which an index should be made.", category: "required"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", + category: "common"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task FilterShortReadsBam { +task Markdup { input { - File bamFile - String outputPathBam - String memory = "1G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) + File inputBam + String outputBamPath + + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } - String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") - command { set -e - mkdir -p "$(dirname ~{outputPathBam})" - samtools view -h ~{bamFile} | \ - awk 'length($10) > 30 || $1 ~/^@/' | \ - samtools view -bS -> ~{outputPathBam} - samtools index ~{outputPathBam} ~{outputPathBamIndex} + mkdir -p "$(dirname ~{outputBamPath})" + samtools markdup ~{inputBam} ~{outputBamPath} } output { - File filteredBam = outputPathBam - File filteredBamIndex = outputPathBamIndex + File outputBam = outputBamPath } runtime { - memory: memory - time_minutes: timeMinutes docker: dockerImage + time_minutes: timeMinutes } parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + # inputs + inputBam: {description: "The BAM file to be processed.", category: "required"} + outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} } } -task Flagstat { +task Merge { input { - File inputBam - String outputPath + Array[File]+ bamFiles + String outputBamPath = "merged.bam" + Boolean force = true - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inputBam, "G")) + Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } + String indexPath = sub(outputBamPath, "\.bam$",".bai") command { set -e - mkdir -p "$(dirname ~{outputPath})" - samtools flagstat ~{inputBam} > ~{outputPath} + mkdir -p "$(dirname ~{outputBamPath})" + samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} + samtools index ~{outputBamPath} ~{indexPath} } output { - File flagstat = outputPath + File outputBam = outputBamPath + File outputBamIndex = indexPath } runtime { - memory: memory - time_minutes: timeMinutes docker: dockerImage + time_minutes: timeMinutes } parameter_meta { # inputs - inputBam: {description: "The BAM file for which statistics should be retrieved.", category: "required"} - outputPath: {description: "The location the ouput should be written to.", category: "required"} - memory: {description: "The amount of memory needed for the job.", category: "advanced"} + bamFiles: {description: "The BAM files to merge.", category: "required"} + outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -task Fastq { +task Sort { input { File inputBam - String outputRead1 - String? outputRead2 - String? outputRead0 - Int? includeFilter - Int? excludeFilter - Int? excludeSpecificFilter - Boolean appendReadNumber = false - Boolean outputQuality = false - Int? compressionLevel + String outputPath + Boolean sortByName = false + Int compressionLevel = 1 - Int threads = 1 - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String memory = "2G" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int? threads } command { - samtools fastq \ - ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ - ~{"-2 " + outputRead2} \ - ~{"-0 " + outputRead0} \ - ~{"-f " + includeFilter} \ - ~{"-F " + excludeFilter} \ - ~{"-G " + excludeSpecificFilter} \ - ~{true="-N" false="-n" appendReadNumber} \ - ~{true="-O" false="" outputQuality} \ - ~{"-c " + compressionLevel} \ + set -e + mkdir -p "$(dirname ~{outputPath})" + samtools sort \ + -l ~{compressionLevel} \ + ~{true="-n" false="" sortByName} \ ~{"--threads " + threads} \ + -o ~{outputPath} \ ~{inputBam} } output { - File read1 = outputRead1 - File? read2 = outputRead2 - File? read0 = outputRead0 + File outputSortedBam = outputPath } runtime { - cpu: threads + cpu: 1 + select_first([threads, 0]) memory: memory docker: dockerImage time_minutes: timeMinutes @@ -396,20 +400,16 @@ task Fastq { parameter_meta { # inputs - inputBam: {description: "The bam file to process.", category: "required"} - outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} - outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} - outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} - includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} - excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} - excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} - appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} - outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + # outputs + outputSortedBam: {description: "Sorted BAM file."} } } @@ -518,38 +518,4 @@ task View { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} - -task FilterShortReadsBam { - input { - File bamFile - String outputPathBam - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" - } - - String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") - - command { - set -e - mkdir -p "$(dirname ~{outputPathBam})" - samtools view -h ~{bamFile} | \ - awk 'length($10) > 30 || $1 ~/^@/' | \ - samtools view -bS -> ~{outputPathBam} - samtools index ~{outputPathBam} ~{outputPathBamIndex} - } - - output { - File filteredBam = outputPathBam - File filteredBamIndex = outputPathBamIndex - } - - runtime { - docker: dockerImage - } - - parameter_meta { - bamFile: {description: "The bam file to process.", category: "required"} - outputPathBam: {description: "The filtered bam file.", category: "common"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - } -} +} \ No newline at end of file From 859b722f2c671bd03c21a1dc563f8adde0199c57 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 May 2020 14:45:27 +0200 Subject: [PATCH 0369/1208] add options specific for RNA haplotype calling --- gatk.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index b730cbee..33a43520 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -910,6 +910,8 @@ task HaplotypeCaller { String? outputMode Boolean gvcf = false String emitRefConfidence = if gvcf then "GVCF" else "NONE" + Boolean dontUseSoftClippedBases = false + Float standardMinConfidenceThresholdForCalling String memory = "12G" String javaXmx = "4G" @@ -931,7 +933,9 @@ task HaplotypeCaller { ~{"--pedigree " + pedigree} \ ~{"--contamination-fraction-per-sample-file " + contamination} \ ~{"--output-mode " + outputMode} \ - --emit-ref-confidence ~{emitRefConfidence} + --emit-ref-confidence ~{emitRefConfidence} \ + ~{true="--dont-use-soft-clipped-bases" false="" dontUseSoftClippedBases} \ + ~{"--standard-min-confidence-threshold-for-calling " + standardMinConfidenceThresholdForCalling} } output { From 5ca28720b68d844baeca013a19a5e14a45b25a85 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 May 2020 16:14:19 +0200 Subject: [PATCH 0370/1208] make minconfidence treshold optional --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 33a43520..6c28ab68 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -911,7 +911,7 @@ task HaplotypeCaller { Boolean gvcf = false String emitRefConfidence = if gvcf then "GVCF" else "NONE" Boolean dontUseSoftClippedBases = false - Float standardMinConfidenceThresholdForCalling + Float? standardMinConfidenceThresholdForCalling String memory = "12G" String javaXmx = "4G" From 6b8a7551655010f6680c93806ea35cc4521b2a8c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 11 May 2020 11:09:00 +0200 Subject: [PATCH 0371/1208] time_minutes --- centrifuge.wdl | 12 ++++++++++++ minimap2.wdl | 6 ++++++ survivor.wdl | 3 +++ talon.wdl | 24 ++++++++++++++++++++++++ transcriptclean.wdl | 9 +++++++++ vt.wdl | 3 +++ 6 files changed, 57 insertions(+) diff --git a/centrifuge.wdl b/centrifuge.wdl index 1fbc7be1..f2b26043 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -37,6 +37,7 @@ task Build { Int threads = 5 String memory = "20G" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -64,6 +65,7 @@ task Build { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -82,6 +84,7 @@ task Build { sizeTable: {description: "List of taxonomic IDs and lengths of the sequences belonging to the same taxonomic IDs.", category: "common"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -184,6 +187,7 @@ task Inspect { Int? across String memory = "4G" + Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -210,6 +214,7 @@ task Inspect { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -220,6 +225,7 @@ task Inspect { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} across: {description: "When printing FASTA output, output a newline character every bases.", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -307,6 +313,7 @@ task Kreport { Int? minimumLength String memory = "4G" + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -335,6 +342,7 @@ task Kreport { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -349,6 +357,7 @@ task Kreport { minimumScore: {description: "Require a minimum score for reads to be counted.", category: "advanced"} minimumLength: {description: "Require a minimum alignment length to the read.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -362,6 +371,7 @@ task KTimportTaxonomy { String outputPrefix String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/krona:v2.7.1_cv1" } @@ -379,6 +389,7 @@ task KTimportTaxonomy { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -387,6 +398,7 @@ task KTimportTaxonomy { inputFile: {description: "File with Centrifuge classification results.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/minimap2.wdl b/minimap2.wdl index fd28d4a9..04b02bf2 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -32,6 +32,7 @@ task Indexing { Int cores = 1 String memory = "4G" + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" } @@ -55,6 +56,7 @@ task Indexing { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -68,6 +70,7 @@ task Indexing { splitIndex: {description: "Split index for every ~NUM input bases.", category: "advanced"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output @@ -96,6 +99,7 @@ task Mapping { Int cores = 4 String memory = "30G" + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" } @@ -128,6 +132,7 @@ task Mapping { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -149,6 +154,7 @@ task Mapping { queryFile: {description: "Input fasta file.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output diff --git a/survivor.wdl b/survivor.wdl index ded11d75..e5ac7b5b 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -33,6 +33,7 @@ task Merge { Int minSize = 30 String outputPath = "./survivor/merged.vcf" String memory = "24G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } @@ -57,6 +58,7 @@ task Merge { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -71,6 +73,7 @@ task Merge { minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/talon.wdl b/talon.wdl index 6ddb841e..c6402fe4 100644 --- a/talon.wdl +++ b/talon.wdl @@ -31,6 +31,7 @@ task CreateAbundanceFileFromDatabase { File? datasetsFile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -52,6 +53,7 @@ task CreateAbundanceFileFromDatabase { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -64,6 +66,7 @@ task CreateAbundanceFileFromDatabase { whitelistFile: {description: "Whitelist file of transcripts to include in the output.", category: "advanced"} datasetsFile: {description: "A file indicating which datasets should be included.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -84,6 +87,7 @@ task CreateGtfFromDatabase { File? datasetFile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -106,6 +110,7 @@ task CreateGtfFromDatabase { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -119,6 +124,7 @@ task CreateGtfFromDatabase { whitelistFile: {description: "Whitelist file of transcripts to include in the output.", category: "advanced"} datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -135,6 +141,7 @@ task FilterTalonTranscripts { File? pairingsFile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -154,6 +161,7 @@ task FilterTalonTranscripts { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -164,6 +172,7 @@ task FilterTalonTranscripts { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} pairingsFile: {description: "A file indicating which datasets should be considered together.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -180,6 +189,7 @@ task GetReadAnnotations { File? datasetFile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -199,6 +209,7 @@ task GetReadAnnotations { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -209,6 +220,7 @@ task GetReadAnnotations { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -228,6 +240,7 @@ task InitializeTalonDatabase { String outputPrefix String memory = "10G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -251,6 +264,7 @@ task InitializeTalonDatabase { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -265,6 +279,7 @@ task InitializeTalonDatabase { cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -277,6 +292,7 @@ task ReformatGtf { File GTFfile String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -292,6 +308,7 @@ task ReformatGtf { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -299,6 +316,7 @@ task ReformatGtf { # inputs GTFfile: {description: "GTF annotation containing genes, transcripts, and edges.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -315,6 +333,7 @@ task SummarizeDatasets { File? datasetGroupsCSV String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -334,6 +353,7 @@ task SummarizeDatasets { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -344,6 +364,7 @@ task SummarizeDatasets { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -364,6 +385,7 @@ task Talon { Int cores = 4 String memory = "25G" + Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -399,6 +421,7 @@ task Talon { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -414,6 +437,7 @@ task Talon { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 68bcbf24..8c62190f 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -28,6 +28,7 @@ task GetSJsFromGtf { Int minIntronSize = 21 String memory = "8G" + Int timeMinutes = 1 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -47,6 +48,7 @@ task GetSJsFromGtf { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -57,6 +59,7 @@ task GetSJsFromGtf { minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -70,6 +73,7 @@ task GetTranscriptCleanStats { String outputPrefix String memory = "4G" + Int timeMinutes = 1 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -87,6 +91,7 @@ task GetTranscriptCleanStats { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -95,6 +100,7 @@ task GetTranscriptCleanStats { transcriptCleanSAMfile: {description: "Output SAM file from TranscriptClean", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -124,6 +130,7 @@ task TranscriptClean { Int cores = 1 String memory = "25G" + Int timeMinutes = 2880 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -159,6 +166,7 @@ task TranscriptClean { runtime { cpu: cores memory: memory + time_minute: timeMinutes docker: dockerImage } @@ -181,6 +189,7 @@ task TranscriptClean { variantFile: {description: "VCF formatted file of variants.", category: "common"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/vt.wdl b/vt.wdl index 54599db0..d4c134b9 100644 --- a/vt.wdl +++ b/vt.wdl @@ -29,6 +29,7 @@ task Normalize { String outputPath = "./vt/normalized_decomposed.vcf" String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" String memory = "4G" + Int timeMinutes = 30 } command { @@ -43,6 +44,7 @@ task Normalize { runtime { memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -54,6 +56,7 @@ task Normalize { referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} memory: {description: "The memory required to run the programs", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 73867fd5b045fdd1068f3b968a5e5927ee5a86ef Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 11 May 2020 11:27:09 +0200 Subject: [PATCH 0372/1208] Record change in changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4dea7c7..153fa69c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- ++ GATK.HaplotypeCaller: Add `--dont-use-soft-clipped-bases` and + `--standard-min-confidence-threshold-for-calling` options. These are + required for RNA seq variant calling according to GATK best practices. + Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. + Generalize sort task to now also sort by position, instead of just read name. From 6693dcfa829bb120c7d9c40450b8f34a615b300f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 11 May 2020 11:27:21 +0200 Subject: [PATCH 0373/1208] Add extra parameter_meta options for haplotypecaller --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 6c28ab68..586c25d0 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -966,6 +966,8 @@ task HaplotypeCaller { category: "advanced"} emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'", category: "advanced"} + dontUseSoftClippedBases: {description: "Do not use soft-clipped bases. Should be 'true' for RNA variant calling.", category: "common"} + standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} From 3b8ca6a86272af3ae2603ede47305e5416785093 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 13 May 2020 13:21:46 +0200 Subject: [PATCH 0374/1208] time_minutes --- ccs.wdl | 3 +++ isoseq3.wdl | 3 +++ lima.wdl | 3 +++ talon.wdl | 14 +++++++------- transcriptclean.wdl | 4 ++-- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index 3a8f8879..d428053f 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -33,6 +33,7 @@ task CCS { Int cores = 2 String memory = "2G" + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" } @@ -63,6 +64,7 @@ task CCS { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -78,6 +80,7 @@ task CCS { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/isoseq3.wdl b/isoseq3.wdl index 10d87bbc..d241027c 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,6 +32,7 @@ task Refine { Int cores = 2 String memory = "2G" + Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } @@ -61,6 +62,7 @@ task Refine { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -75,6 +77,7 @@ task Refine { outputNamePrefix: {description: "Basename of the output files.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/lima.wdl b/lima.wdl index ba8a5407..77bcf320 100644 --- a/lima.wdl +++ b/lima.wdl @@ -50,6 +50,7 @@ task Lima { Int cores = 2 String memory = "2G" + Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" } @@ -110,6 +111,7 @@ task Lima { runtime { cpu: cores memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -142,6 +144,7 @@ task Lima { outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/talon.wdl b/talon.wdl index c6402fe4..b2ae3a62 100644 --- a/talon.wdl +++ b/talon.wdl @@ -31,7 +31,7 @@ task CreateAbundanceFileFromDatabase { File? datasetsFile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -87,7 +87,7 @@ task CreateGtfFromDatabase { File? datasetFile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -141,7 +141,7 @@ task FilterTalonTranscripts { File? pairingsFile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -189,7 +189,7 @@ task GetReadAnnotations { File? datasetFile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -240,7 +240,7 @@ task InitializeTalonDatabase { String outputPrefix String memory = "10G" - Int timeMinutes = 1 + Int timeMinutes = 60 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -292,7 +292,7 @@ task ReformatGtf { File GTFfile String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } @@ -333,7 +333,7 @@ task SummarizeDatasets { File? datasetGroupsCSV String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 50 String dockerImage = "biocontainers/talon:v4.4.2_cv1" } diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 8c62190f..15da1f58 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -28,7 +28,7 @@ task GetSJsFromGtf { Int minIntronSize = 21 String memory = "8G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -73,7 +73,7 @@ task GetTranscriptCleanStats { String outputPrefix String memory = "4G" - Int timeMinutes = 1 + Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } From e39ea045beac06432a338f885972946938f479a4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 14 May 2020 10:27:35 +0200 Subject: [PATCH 0375/1208] adjust time_minuteso --- ccs.wdl | 2 +- isoseq3.wdl | 2 +- lima.wdl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index d428053f..1762ac75 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -33,7 +33,7 @@ task CCS { Int cores = 2 String memory = "2G" - Int timeMinutes = 120 + Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" } diff --git a/isoseq3.wdl b/isoseq3.wdl index d241027c..9e0dfdb2 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,7 +32,7 @@ task Refine { Int cores = 2 String memory = "2G" - Int timeMinutes = 180 + Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" } diff --git a/lima.wdl b/lima.wdl index 77bcf320..ddd37da4 100644 --- a/lima.wdl +++ b/lima.wdl @@ -50,7 +50,7 @@ task Lima { Int cores = 2 String memory = "2G" - Int timeMinutes = 1440 + Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" } From a4319c3e3a89af6f38310f1560b3c6e6cc6932f4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 14 May 2020 10:37:42 +0200 Subject: [PATCH 0376/1208] update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 153fa69c..a167222c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,14 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- ++ Adjusted the memory settings of many tools, especially java tools. + The should now more accurately represent actual memory usage (as + opposed to virtual memory). ++ Added `-XX:ParallelGCThreads=1` to the java options of java tasks. ++ Added `timeMinutes` input to many tasks, this indicates a maximum + number of minutes that the job will run. The associated runtime + attribute is `time_minutes` which can be used to inform + a scheduler (eg. slurm) of the run time of the job. + GATK.HaplotypeCaller: Add `--dont-use-soft-clipped-bases` and `--standard-min-confidence-threshold-for-calling` options. These are required for RNA seq variant calling according to GATK best practices. From fa1901c451dc3465e94d1b3b36be26a7260203b7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 14 May 2020 16:47:17 +0200 Subject: [PATCH 0377/1208] adress comments --- CHANGELOG.md | 4 +++- bedtools.wdl | 9 +++++--- biopet/bamstats.wdl | 2 +- biopet/biopet.wdl | 51 +---------------------------------------- biopet/sampleconfig.wdl | 2 +- biowdl.wdl | 2 +- bowtie.wdl | 2 +- bwa.wdl | 2 +- common.wdl | 2 +- gatk.wdl | 24 +++++++++---------- hisat2.wdl | 2 +- picard.wdl | 12 +++++----- rtg.wdl | 4 ++-- star.wdl | 2 +- stringtie.wdl | 2 +- 15 files changed, 39 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05e79ac3..540fbbf0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,10 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- ++ The struct `BowtieIndex` was removed, as it has become obsolete. ++ The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. - The should now more accurately represent actual memory usage (as + They should now more accurately represent actual memory usage (as opposed to virtual memory). + Added `-XX:ParallelGCThreads=1` to the java options of java tasks. + Added `timeMinutes` input to many tasks, this indicates a maximum diff --git a/bedtools.wdl b/bedtools.wdl index a64cef1a..c228d6c6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -25,7 +25,7 @@ task Complement { File faidx File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" - String memory = "2G" + String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -66,6 +66,7 @@ task Merge { input { File inputBed String outputBed = "merged.bed" + String memory = "~{512 + ceil(size(inputBed, "M"))}M" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -79,6 +80,7 @@ task Merge { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -86,6 +88,7 @@ task Merge { parameter_meta { inputBed: {description: "The bed to merge.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -97,7 +100,7 @@ task MergeBedFiles { input { Array[File]+ bedFiles String outputBed = "merged.bed" - String memory = "2G" + String memory = "~{512 + ceil(size(bedFiles, "M"))}M" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -174,7 +177,7 @@ task Intersect { # Giving a faidx file will set the sorted option. File? faidx String outputBed = "intersect.bed" - String memory = "2G" + String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index af01bb2e..d71355d3 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -34,7 +34,7 @@ task Generate { String outputDir Reference? reference - String memory = "10G" + String memory = "9G" String javaXmx = "8G" } diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index b90c5f4c..d56ed574 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -104,7 +104,7 @@ task ExtractAdaptersFastqc { Float? adapterCutoff Boolean? outputAsFasta - String memory = "10G" + String memory = "9G" String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" Int timeMinutes = 5 @@ -210,55 +210,6 @@ task FastqSync { } } -task ReorderGlobbedScatters { - input { - Array[File]+ scatters - - # Should not be changed from the main pipeline. As it should not influence results. - # The 3.7-slim container is 143 mb on the filesystem. 3.7 is 927 mb. - # The slim container is sufficient for this small task. - String dockerImage = "python:3.7-slim" - Int timeMinutes = 5 - } - - command <<< - set -e - # Copy all the scatter files to the CWD so the output matches paths in - # the cwd. - for file in ~{sep=" " scatters} - do cp $file . - done - python << CODE - from os.path import basename - scatters = ['~{sep="','" scatters}'] - splitext = [basename(x).split(".") for x in scatters] - splitnum = [x.split("-") + [y] for x,y in splitext] - ordered = sorted(splitnum, key=lambda x: int(x[1])) - merged = ["{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] - for x in merged: - print(x) - CODE - >>> - - output { - Array[File] reorderedScatters = read_lines(stdout()) - } - - runtime { - docker: dockerImage - time_minutes = timeMinutes - # 4 gigs of memory to be able to build the docker image in singularity - memory: "4G" - } - - parameter_meta { - scatters: {description: "The files which should be ordered.", category: "required"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - task ScatterRegions { input { File referenceFasta diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl index 50f26311..2b36952b 100644 --- a/biopet/sampleconfig.wdl +++ b/biopet/sampleconfig.wdl @@ -34,7 +34,7 @@ task SampleConfig { String? jsonOutputPath String? tsvOutputPath - String memory = "18G" + String memory = "17G" String javaXmx = "16G" } diff --git a/biowdl.wdl b/biowdl.wdl index 7661a592..838755d9 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -52,7 +52,7 @@ task InputConverter { } runtime { - memory: "2G" + memory: "128M" time_minutes: timeMinutes docker: dockerImage } diff --git a/bowtie.wdl b/bowtie.wdl index 500afea6..b3f3ceae 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -38,7 +38,7 @@ task Bowtie { Int threads = 1 Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) - String memory = "10G" + String memory = "~{5 + ceil(size(indexFiles, "G"))}G" String picardXmx = "4G" # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" diff --git a/bwa.wdl b/bwa.wdl index a39eb3e9..01dae9b4 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,7 +29,7 @@ task Mem { String? readgroup Int threads = 4 - String memory = "20G" + String memory = "~{5 + ceil(size(indexFiles, "G"))}G" String picardXmx = "4G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) # A mulled container is needed to have both picard and bwa in one container. diff --git a/common.wdl b/common.wdl index 88848df2..f8325523 100644 --- a/common.wdl +++ b/common.wdl @@ -214,7 +214,7 @@ task YamlToJson { String outputJson = basename(yaml, "\.ya?ml$") + ".json" Int timeMinutes = 1 - String memory = "1G" + String memory = "128M" # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } diff --git a/gatk.wdl b/gatk.wdl index ff30b2ba..edafc4d4 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -317,7 +317,7 @@ task CollectAllelicCounts { File referenceFastaDict File referenceFastaFai - String memory = "12G" + String memory = "11G" String javaXmx = "10G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -373,8 +373,8 @@ task CollectReadCounts { File referenceFastaFai String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "5G" - String javaXmx = "4G" + String memory = "8G" + String javaXmx = "7G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -557,8 +557,8 @@ task CreateReadCountPanelOfNormals { Array[File]+ readCountsFiles File? annotatedIntervals - String memory = "5G" - String javaXmx = "4G" + String memory = "8G" + String javaXmx = "7G" Int timeMinutes = 5 String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer causes a spark related error for some reason... } @@ -604,7 +604,7 @@ task DenoiseReadCounts { File readCounts String outputPrefix - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -662,7 +662,7 @@ task FilterMutectCalls { Int uniqueAltReadCount = 4 File mutect2Stats - String memory = "14G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -771,7 +771,7 @@ task GenomicsDBImport { String genomicsDBWorkspacePath = "genomics_db" String genomicsDBTarFile = "genomics_db.tar.gz" String? tmpDir - String memory = "6G" + String memory = "5G" String javaXmx = "4G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -896,7 +896,7 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "14G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1036,7 +1036,7 @@ task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "14G" + String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" @@ -1075,7 +1075,7 @@ task MergeStats { input { Array[File]+ stats - String memory = "16G" + String memory = "15G" String javaXmx = "14G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" @@ -1122,7 +1122,7 @@ task ModelSegments { else 30 Int maximumNumberOfSmoothingIterations = 10 - String memory = "12G" + String memory = "11G" String javaXmx = "10G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" diff --git a/hisat2.wdl b/hisat2.wdl index 3ea18ee8..7d638f1f 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -33,7 +33,7 @@ task Hisat2 { Boolean downstreamTranscriptomeAssembly = true Int threads = 4 - String memory = "48G" + String memory = "~{threads + 5 + ceil(size(indexFiles, "G"))}G" Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools diff --git a/picard.wdl b/picard.wdl index a63c1ba2..9d401631 100644 --- a/picard.wdl +++ b/picard.wdl @@ -84,7 +84,7 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -200,7 +200,7 @@ task CollectRnaSeqMetrics { String basename String strandSpecificity = "NONE" - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -460,7 +460,7 @@ task MarkDuplicates { String outputBamPath String metricsPath - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -581,7 +581,7 @@ task SamToFastq { File inputBamIndex Boolean paired = true - String memory = "18G" + String memory = "17G" String javaXmx = "16G" # High memory default to avoid crashes. String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" File? NONE @@ -653,7 +653,7 @@ task SortVcf { String outputVcfPath File? dict - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -701,7 +701,7 @@ task RenameSample { File inputVcf String outputPath = "./picard/renamed.vcf" String newSampleName - String memory = "10G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" diff --git a/rtg.wdl b/rtg.wdl index 03a3f5dc..104a5ef9 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -27,7 +27,7 @@ task Format { Array[File]+ inputFiles String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" String rtgMem = "8G" - String memory = "10G" + String memory = "9G" Int timeMinutes = 1 + ceil(size(inputFiles) * 2) } @@ -80,7 +80,7 @@ task VcfEval { String outputMode = "split" Int threads = 1 # tool default is number of cores in the system 😱 String rtgMem = "8G" - String memory = "10G" + String memory = "9G" Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } diff --git a/star.wdl b/star.wdl index 7824c764..11fde466 100644 --- a/star.wdl +++ b/star.wdl @@ -101,7 +101,7 @@ task Star { Int? limitBAMsortRAM Int runThreadN = 4 - String memory = "48G" + String memory = "~{5 + ceil(size(indexFiles, "G"))}G" Int timeMinutes = 1 + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } diff --git a/stringtie.wdl b/stringtie.wdl index f1d994b3..5ed62dea 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -32,7 +32,7 @@ task Stringtie { String? geneAbundanceFile Int threads = 1 - String memory = "10G" + String memory = "2G" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" } From c535d259e32ffb2ed409585c8d9bb1d2a61a0008 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 15 May 2020 10:23:58 +0200 Subject: [PATCH 0378/1208] fix variable used for time_minutes estimation in bwa --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 01dae9b4..3dd7883b 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,7 +29,7 @@ task Mem { String? readgroup Int threads = 4 - String memory = "~{5 + ceil(size(indexFiles, "G"))}G" + String memory = "~{5 + ceil(size(bwaIndex.indexFiles, "G"))}G" String picardXmx = "4G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) # A mulled container is needed to have both picard and bwa in one container. From c3dbda9b0af69681b0ab21b44fd4c23ec06f9745 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 09:12:12 +0200 Subject: [PATCH 0379/1208] MultiQC should work directly with report files --- multiqc.wdl | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index db1dd21e..85ce58df 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -23,8 +23,8 @@ version 1.0 task MultiQC { input { # Use a string here so cromwell does not relocate an entire analysis directory - String analysisDirectory - Array[File] dependencies = [] # This must be used in order to run multiqc after these tasks. + Array[File] reports + String reportDir = "reports" Boolean force = false Boolean dirs = false Int? dirsDepth @@ -62,6 +62,23 @@ task MultiQC { } command { + # Below code requires python 3.6 or higher. + # This makes sure all report files are in a report directory that MultiQC can investigate. + python3 < Date: Mon, 18 May 2020 09:21:12 +0200 Subject: [PATCH 0380/1208] fix parameter_meta' --- multiqc.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index 85ce58df..dec91f7c 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -129,8 +129,7 @@ task MultiQC { } parameter_meta { - analysisDirectory: {description: "The directory to run MultiQC on.", category: "required"} - dependencies: {description: "This must be used in order to run multiqc after these tasks.", category: "internal_use_only"} + reports: {description: "Reports which multiqc should run on.", category: "required"} force: {description: "Equivalent to MultiQC's `--force` flag.", category: "advanced"} dirs: {description: "Equivalent to MultiQC's `--dirs` flag.", category: "advanced"} dirsDepth: {description: "Equivalent to MultiQC's `--dirs-depth` option.", category: "advanced"} From 44a372eda41c9b37e3e28d2aa4d67bf5340827b4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 10:12:03 +0200 Subject: [PATCH 0381/1208] add reportfile output to star --- star.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/star.wdl b/star.wdl index 4c407331..5d0d6b6c 100644 --- a/star.wdl +++ b/star.wdl @@ -127,6 +127,7 @@ task Star { output { File bamFile = outFileNamePrefix + "Aligned." + samOutputNames[outSAMtype] + File logFinalOut = outFileNamePrefix + "Log.final.out" } runtime { From e1806e2ea3946b6be28df6a39a234dc2a5a691c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 11:03:40 +0200 Subject: [PATCH 0382/1208] add summary file to hisat2 task --- hisat2.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hisat2.wdl b/hisat2.wdl index bc6be2e8..aafa3331 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -31,6 +31,7 @@ task Hisat2 { String readgroup String platform = "illumina" Boolean downstreamTranscriptomeAssembly = true + String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" Int threads = 1 String memory = "48G" @@ -55,6 +56,7 @@ task Hisat2 { --rg 'LB:~{library}' \ --rg 'PL:~{platform}' \ ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ + --summary-file ~{summaryFilePath} \ | samtools sort > ~{outputBam} samtools index ~{outputBam} ~{bamIndexPath} } @@ -62,6 +64,7 @@ task Hisat2 { output { File bamFile = outputBam File bamIndex = bamIndexPath + File summaryFile = summaryFilePath } runtime { From cf652ee34d5c39e5b841dcc048c457dad332624a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 11:15:49 +0200 Subject: [PATCH 0383/1208] hisat2.wdl --- hisat2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hisat2.wdl b/hisat2.wdl index aafa3331..b65b2da9 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -56,7 +56,7 @@ task Hisat2 { --rg 'LB:~{library}' \ --rg 'PL:~{platform}' \ ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ - --summary-file ~{summaryFilePath} \ + --new-summary ~{summaryFilePath} \ | samtools sort > ~{outputBam} samtools index ~{outputBam} ~{bamIndexPath} } From 17cdbd13a8ea3288eb0e334cbb81a694f64b659e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 13:30:15 +0200 Subject: [PATCH 0384/1208] make pdf files optional --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index 5393cd3a..e9275504 100644 --- a/picard.wdl +++ b/picard.wdl @@ -114,7 +114,7 @@ task CollectMultipleMetrics { File baitBiasDetail = basename + ".bait_bias_detail_metrics" File baitBiasSummary = basename + ".bait_bias_summary_metrics" File baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" - File baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" + File? baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" File errorSummary = basename + ".error_summary_metrics" File gcBiasDetail = basename + ".gc_bias.detail_metrics" File gcBiasPdf = basename + ".gc_bias.pdf" @@ -124,9 +124,9 @@ task CollectMultipleMetrics { File preAdapterDetail = basename + ".pre_adapter_detail_metrics" File preAdapterSummary = basename + ".pre_adapter_summary_metrics" File qualityByCycle = basename + ".quality_by_cycle_metrics" - File qualityByCyclePdf = basename + ".quality_by_cycle.pdf" + File? qualityByCyclePdf = basename + ".quality_by_cycle.pdf" File qualityDistribution = basename + ".quality_distribution_metrics" - File qualityDistributionPdf = basename + ".quality_distribution.pdf" + File? qualityDistributionPdf = basename + ".quality_distribution.pdf" File qualityYield = basename + ".quality_yield_metrics" # Using a glob is easier. But will lead to very ugly output directories. Array[File] allStats = select_all([ From e055553df027fb2047570e68c6aaf38f9a93faf3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 May 2020 15:06:13 +0200 Subject: [PATCH 0385/1208] fix missing parameter_meta --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 9d401631..057b1919 100644 --- a/picard.wdl +++ b/picard.wdl @@ -60,6 +60,7 @@ task BedToIntervalList { memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From f5f38a7b67e35ff291de2de8739ae7d941871c3a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 15:12:09 +0200 Subject: [PATCH 0386/1208] add allFiles output to gffcompare --- gffcompare.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/gffcompare.wdl b/gffcompare.wdl index ca2b1669..bf4be325 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -107,6 +107,7 @@ task GffCompare { File? missedIntrons = if debugMode then totalPrefix + ".missed_introns.gtf" else noneFile + Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons]) } runtime { From efd99dda531a5f0e1381d9217a2c5853c16e967b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 18 May 2020 15:31:08 +0200 Subject: [PATCH 0387/1208] repair hisat2 --- hisat2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hisat2.wdl b/hisat2.wdl index b65b2da9..b662dcb7 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -56,7 +56,8 @@ task Hisat2 { --rg 'LB:~{library}' \ --rg 'PL:~{platform}' \ ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ - --new-summary ~{summaryFilePath} \ + --new-summary \ + --summary-file ~{summaryFilePath} \ | samtools sort > ~{outputBam} samtools index ~{outputBam} ~{bamIndexPath} } From dbe93a9e8c275996cb3f2a846ce56b607926eb88 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 19 May 2020 10:06:56 +0200 Subject: [PATCH 0388/1208] Make a number of outputs optional in Picard. --- CHANGELOG.md | 3 +++ picard.wdl | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 540fbbf0..010d81aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- ++ Picard: Make a number of outputs in `CollectMultipleMetrics` optional. BamMetrics + will fail if `CollectAlignmentSummaryMetrics` & `MeanQualityByCycle` in this + task are not optional. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. diff --git a/picard.wdl b/picard.wdl index 057b1919..c17029e2 100644 --- a/picard.wdl +++ b/picard.wdl @@ -113,7 +113,7 @@ task CollectMultipleMetrics { } output { - File alignmentSummary = basename + ".alignment_summary_metrics" + File? alignmentSummary = basename + ".alignment_summary_metrics" File baitBiasDetail = basename + ".bait_bias_detail_metrics" File baitBiasSummary = basename + ".bait_bias_summary_metrics" File baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" @@ -126,10 +126,10 @@ task CollectMultipleMetrics { File? insertSize = basename + ".insert_size_metrics" File preAdapterDetail = basename + ".pre_adapter_detail_metrics" File preAdapterSummary = basename + ".pre_adapter_summary_metrics" - File qualityByCycle = basename + ".quality_by_cycle_metrics" - File qualityByCyclePdf = basename + ".quality_by_cycle.pdf" + File? qualityByCycle = basename + ".quality_by_cycle_metrics" + File? qualityByCyclePdf = basename + ".quality_by_cycle.pdf" File qualityDistribution = basename + ".quality_distribution_metrics" - File qualityDistributionPdf = basename + ".quality_distribution.pdf" + File? qualityDistributionPdf = basename + ".quality_distribution.pdf" File qualityYield = basename + ".quality_yield_metrics" # Using a glob is easier. But will lead to very ugly output directories. Array[File] allStats = select_all([ From 42c6d473d8b67c0f0d3387b6a62438780268ad2d Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 19 May 2020 10:16:23 +0200 Subject: [PATCH 0389/1208] Update CHANGELOG text. --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 010d81aa..d4719dcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,8 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- + Picard: Make a number of outputs in `CollectMultipleMetrics` optional. BamMetrics - will fail if `CollectAlignmentSummaryMetrics` & `MeanQualityByCycle` in this - task are not optional. + will fail if `CollectAlignmentSummaryMetrics` & `MeanQualityByCycle` are set + to false and their outputs are not optional. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. From 28cb8e8d2584c3ee95be609daf0ac50d2ac547e2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 19 May 2020 10:23:55 +0200 Subject: [PATCH 0390/1208] Set all outputs to optional. --- CHANGELOG.md | 5 ++--- picard.wdl | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4719dcd..65a37944 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,8 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- -+ Picard: Make a number of outputs in `CollectMultipleMetrics` optional. BamMetrics - will fail if `CollectAlignmentSummaryMetrics` & `MeanQualityByCycle` are set - to false and their outputs are not optional. ++ Picard: Make all outputs in `CollectMultipleMetrics`. This will make sure the + task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. diff --git a/picard.wdl b/picard.wdl index c17029e2..136081be 100644 --- a/picard.wdl +++ b/picard.wdl @@ -114,25 +114,25 @@ task CollectMultipleMetrics { output { File? alignmentSummary = basename + ".alignment_summary_metrics" - File baitBiasDetail = basename + ".bait_bias_detail_metrics" - File baitBiasSummary = basename + ".bait_bias_summary_metrics" - File baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" - File baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" - File errorSummary = basename + ".error_summary_metrics" - File gcBiasDetail = basename + ".gc_bias.detail_metrics" - File gcBiasPdf = basename + ".gc_bias.pdf" - File gcBiasSummary = basename + ".gc_bias.summary_metrics" + File? baitBiasDetail = basename + ".bait_bias_detail_metrics" + File? baitBiasSummary = basename + ".bait_bias_summary_metrics" + File? baseDistributionByCycle = basename + ".base_distribution_by_cycle_metrics" + File? baseDistributionByCyclePdf = basename + ".base_distribution_by_cycle.pdf" + File? errorSummary = basename + ".error_summary_metrics" + File? gcBiasDetail = basename + ".gc_bias.detail_metrics" + File? gcBiasPdf = basename + ".gc_bias.pdf" + File? gcBiasSummary = basename + ".gc_bias.summary_metrics" File? insertSizeHistogramPdf = basename + ".insert_size_histogram.pdf" File? insertSize = basename + ".insert_size_metrics" - File preAdapterDetail = basename + ".pre_adapter_detail_metrics" - File preAdapterSummary = basename + ".pre_adapter_summary_metrics" + File? preAdapterDetail = basename + ".pre_adapter_detail_metrics" + File? preAdapterSummary = basename + ".pre_adapter_summary_metrics" File? qualityByCycle = basename + ".quality_by_cycle_metrics" File? qualityByCyclePdf = basename + ".quality_by_cycle.pdf" - File qualityDistribution = basename + ".quality_distribution_metrics" + File? qualityDistribution = basename + ".quality_distribution_metrics" File? qualityDistributionPdf = basename + ".quality_distribution.pdf" - File qualityYield = basename + ".quality_yield_metrics" + File? qualityYield = basename + ".quality_yield_metrics" # Using a glob is easier. But will lead to very ugly output directories. - Array[File] allStats = select_all([ + Array[File]? allStats = select_all([ alignmentSummary, baitBiasDetail, baitBiasSummary, From ab21de84c00609d3549ee8dc2278d929ff2c160b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 19 May 2020 10:24:54 +0200 Subject: [PATCH 0391/1208] Update CHANGELOG. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65a37944..dfa50280 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 3.2.0-develop --------------------------- -+ Picard: Make all outputs in `CollectMultipleMetrics`. This will make sure the ++ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will make sure the task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. From 48aef36c6ac5a0db0e88bca6ba9702b942c30136 Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 19 May 2020 10:35:58 +0200 Subject: [PATCH 0392/1208] Update picard.wdl Co-authored-by: DavyCats --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 136081be..3103ad9b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -132,7 +132,7 @@ task CollectMultipleMetrics { File? qualityDistributionPdf = basename + ".quality_distribution.pdf" File? qualityYield = basename + ".quality_yield_metrics" # Using a glob is easier. But will lead to very ugly output directories. - Array[File]? allStats = select_all([ + Array[File] allStats = select_all([ alignmentSummary, baitBiasDetail, baitBiasSummary, From f8198099cfa46fdc971764cb6a5bf30496291303 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 19 May 2020 13:16:44 +0200 Subject: [PATCH 0393/1208] fix missing paramter_meta --- biopet/biopet.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index d56ed574..cc8e1bc6 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -280,6 +280,7 @@ task ScatterRegions { bamIndex: {description: "The index for the bamfile given through bamFile.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 408f06c6ed8cf2780f28a28cdc802b02b432f0d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 19 May 2020 15:40:55 +0200 Subject: [PATCH 0394/1208] Update changelog with changes and to a newer version --- CHANGELOG.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa50280..ccd1cedc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,13 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 3.2.0-develop +version 4.0.0-develop --------------------------- ++ Added a log output for STAR. ++ Added report output to Hisat2. ++ Change MultiQC inputs. It now accepts an array of reports files. It does not + need access to a folder with the reports anymore. MultiQC can now be used + as a normal WDL task without hacks. + Picard: Make all outputs in `CollectMultipleMetrics` optional. This will make sure the task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. From d3832132227b8a2197ac0d0b602281223f44c82d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 19 May 2020 16:04:43 +0200 Subject: [PATCH 0395/1208] cleanup multiqc. Add comments on how it works --- multiqc.wdl | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index dec91f7c..b50122e0 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -24,7 +24,6 @@ task MultiQC { input { # Use a string here so cromwell does not relocate an entire analysis directory Array[File] reports - String reportDir = "reports" Boolean force = false Boolean dirs = false Int? dirsDepth @@ -37,13 +36,11 @@ task MultiQC { String? tag String? ignore String? ignoreSamples - Boolean ignoreSymlinks = false File? sampleNames File? fileList Array[String]+? exclude Array[String]+? module Boolean dataDir = false - Boolean noDataDir = false String? dataFormat Boolean zipDataDir = false Boolean export = false @@ -54,16 +51,27 @@ task MultiQC { Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload. File? config # A directory String? clConfig - Array[Boolean] finished = [] # An array of booleans that can be used to let multiqc wait on stuff. - + String memory = "4G" String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } + # This is where the reports end up. It does not need to be changed by the + # user. It is full of symbolic links, so it is not of any use to the user + # anyway. + String reportDir = "reports" + + # Below code requires python 3.6 or higher. + # This makes sure all report files are in a report directory that + # MultiQC can investigate. + # This creates files in report_dir / hashed_parent / file basename. + # By hashing the parent path we make sure there are no file colissions as + # files from the same directory end up in the same directory, while files + # from other directories get their own directory. Cromwell also uses this + # strategy. Using python's builtin hash is unique enough for these purposes. + command { - # Below code requires python 3.6 or higher. - # This makes sure all report files are in a report directory that MultiQC can investigate. python3 < Date: Tue, 19 May 2020 16:09:19 +0200 Subject: [PATCH 0396/1208] Parameter meta for multiqc updated. Make sure data dir output is always zipped by default. --- multiqc.wdl | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index 62ca5421..6a967b3f 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -42,7 +42,7 @@ task MultiQC { Array[String]+? module Boolean dataDir = false String? dataFormat - Boolean zipDataDir = false + Boolean zipDataDir = true Boolean export = false Boolean flat = false Boolean interactive = true @@ -128,7 +128,7 @@ task MultiQC { output { File multiqcReport = outDir + "/" + reportFilename + "_report.html" - File multiqcDataDir = outDir + "/" +reportFilename + "_data" + File? multiqcDataDirZip = outDir + "/" +reportFilename + "_data.zip" } runtime { @@ -151,13 +151,11 @@ task MultiQC { tag: {description: "Equivalent to MultiQC's `--tag` option.", category: "advanced"} ignore: {description: "Equivalent to MultiQC's `--ignore` option.", category: "advanced"} ignoreSamples: {description: "Equivalent to MultiQC's `--ignore-samples` option.", category: "advanced"} - ignoreSymlinks: {description: "Equivalent to MultiQC's `--ignore-symlinks` flag.", category: "advanced"} sampleNames: {description: "Equivalent to MultiQC's `--sample-names` option.", category: "advanced"} fileList: {description: "Equivalent to MultiQC's `--file-list` option.", category: "advanced"} exclude: {description: "Equivalent to MultiQC's `--exclude` option.", category: "advanced"} module: {description: "Equivalent to MultiQC's `--module` option.", category: "advanced"} - dataDir: {description: "Equivalent to MultiQC's `--data-dir` flag.", category: "advanced"} - noDataDir: {description: "Equivalent to MultiQC's `--no-data-dir` flag.", category: "advanced"} + dataDir: {description: "Whether to output a data dir. Sets `--data-dir` or `--no-data-dir` flag.", category: "advanced"} dataFormat: {description: "Equivalent to MultiQC's `--data-format` option.", category: "advanced"} zipDataDir: {description: "Equivalent to MultiQC's `--zip-data-dir` flag.", category: "advanced"} export: {description: "Equivalent to MultiQC's `--export` flag.", category: "advanced"} @@ -168,8 +166,6 @@ task MultiQC { megaQCUpload: {description: "Opposite to MultiQC's `--no-megaqc-upload` flag.", category: "advanced"} config: {description: "Equivalent to MultiQC's `--config` option.", category: "advanced"} clConfig: {description: "Equivalent to MultiQC's `--cl-config` option.", category: "advanced"} - finished: {description: "An array of booleans that can be used to let multiqc wait on stuff.", category: "internal_use_only"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 8aa9d7ead5471e009a04bedd6d00a6b04dc5e3cd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 19 May 2020 16:11:19 +0200 Subject: [PATCH 0397/1208] Add summaryFilePath parameter_meta --- hisat2.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/hisat2.wdl b/hisat2.wdl index 85bd3b35..5937f86d 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -86,6 +86,7 @@ task Hisat2 { readgroup: {description: "The readgroup id.", category: "required"} platform: {description: "The platform used for sequencing.", category: "advanced"} downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} + summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From a879f1c7be866a4124544fece45aa8163f782a2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 20 May 2020 14:58:48 +0200 Subject: [PATCH 0398/1208] add gffcompare output --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ccd1cedc..390faf25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ version 4.0.0-develop --------------------------- + Added a log output for STAR. + Added report output to Hisat2. ++ Added output with all reports to gffcompare. + Change MultiQC inputs. It now accepts an array of reports files. It does not need access to a folder with the reports anymore. MultiQC can now be used as a normal WDL task without hacks. From 0211661c3d7dc4cc95213ed3c2a5640cfcc38ae2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 20 May 2020 15:19:24 +0200 Subject: [PATCH 0399/1208] add missing parameter_meta --- chunked-scatter.wdl | 3 ++- star.wdl | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 1b81687a..111d8fa4 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -59,7 +59,8 @@ task ChunkedScatter { chunkSize: {description: "Equivalent to chunked-scatter's `-c` option.", category: "advanced"} overlap: {description: "Equivalent to chunked-scatter's `-o` option.", category: "advanced"} minimumBasesPerFile: {description: "Equivalent to chunked-scatter's `-m` option.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/star.wdl b/star.wdl index 4942f35e..8e6a511e 100644 --- a/star.wdl +++ b/star.wdl @@ -81,6 +81,7 @@ task GenomeGenerate { threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 63202c731561890fc23fc6a451dd69cdae879108 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 May 2020 10:35:52 +0200 Subject: [PATCH 0400/1208] add bcftools stats --- bcftools.wdl | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 53165c6b..8281deb7 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -55,3 +55,91 @@ task Bcf2Vcf { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } + +task Stats { + input { + File inputVcf + File? compareVcf + String? afBins + String? afTag + Boolean firstAlleleOnly = false + String? collapse + String? depth + String? exclude + File? exons + String? applyFilters + File? fastaRef + File? fastaRefIndex + String? include + Boolean splitByID = false + String? regions + File? regionsFile + Array[String] samples = [] + File? samplesFile + String? targets + File? targetsFile + String? userTsTv + Boolean verbose = false + + Int threads = 0 + Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. + String memory = "2G" # TODO: Safe estimate, refine later. + } + + command { + bcftools \ + ~{"--af-bins " + afBins} \ + ~{"--af-tag " + afTag} \ + ~{true="--1st-allele-only" false="" firstAlleleOnly} \ + ~{"--collapse " + collapse} \ + ~{"--depth " + depth} \ + ~{"--exclude " + exclude} \ + ~{"--exons " + exons} \ + ~{"--apply-filters " + applyFilters} \ + ~{"--fasta-ref " + fastaRef} \ + ~{"--include " + include} \ + ~{true="--split-by-ID" false="" splitByID} \ + ~{"--regions " + regions} \ + ~{"--regions-file " + regionsFile} \ + ~{true="--samples" false="" length(samples) > 0} ~{sep="," samples} \ + ~{"--samples-file " + samplesFile} \ + ~{"--targets " + targets} \ + ~{"--targets-file " + targetsFile} \ + ~{"--user-tstv " + userTsTv} \ + --threads ~{threads} \ + ~{true="--verbose" false="" verbose} \ + ~{inputVcf} ~{compareVcf} + } + + runtime { + cpu: threads + 1 + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + inputVcf: {description: "The vcf to be analysed.", category: "required"} + compareVcf: {description: "When inputVcf and compareVCF are given, the program generates separate stats for intersection and the complements. By default only sites are compared, samples must be given to include also sample columns.", category: "common"} + afBins: {description: "Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1).", category: "advanced"} + afTag: {description: "Allele frequency tag to use, by default estimated from AN,AC or GT.", category: "advanded"} + firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites." category: "advanced"} + collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} + depth: {description: "Depth distribution: min,max,bin size [0,500,1].", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details)." category: "advanced"} + exons: {description: "Tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed).", category: "advanced"} + applyFilters: {description: "Require at least one of the listed FILTER strings (e.g. \"PASS,.\").", category: "advanced"} + fastaRef: {description: "Faidx indexed reference sequence file to determine INDEL context.", category: "advanced"} + fastaRefIndex: {description: "Index file (.fai) for fastaRef. Must be supplied if fastaRef is supplied.", category: "advanced"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + splitByID: {description: "Collect stats for sites with ID separately (known vs novel).", category: "advanced"} + regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} + regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + samplesFile: {description: "File of samples to include.", category: "advanced"} + targets: {description: "Similar to regions but streams rather than index-jumps.", category: "advanced"} + targetsFile: {description: "Similar to regionsFile but streams rather than index-jumps.", category: "advanced"} + userTsTv: {description: ". Collect Ts/Tv stats for any tag using the given binning [0:1:100].", category: "advanced"} + threads: {description: "Number of extra decompression threads [0].", category: "advanced"} + verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} + } +} \ No newline at end of file From 12420e3882d4dd23876c181b312e775ce320a583 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 May 2020 11:30:38 +0200 Subject: [PATCH 0401/1208] add dockerimage --- bcftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 8281deb7..2341585d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -84,6 +84,7 @@ task Stats { Int threads = 0 Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String memory = "2G" # TODO: Safe estimate, refine later. + String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } command { @@ -115,6 +116,7 @@ task Stats { cpu: threads + 1 time_minutes: timeMinutes memory: memory + docker: dockerImage } parameter_meta { From 25bb7b38eca7f1069786cce8a00ba512b2dbe014 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 May 2020 13:42:50 +0200 Subject: [PATCH 0402/1208] start on variant eval --- gatk.wdl | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index edafc4d4..c1cb9480 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1551,6 +1551,78 @@ task SplitNCigarReads { } } +task VariantEval { + input { + Array[File] inputVcfs + Array[File] inputVcfsIndex + Array[File] comparisonVcfs = [] + Array[File] comparisonVcfsIndex = [] + File? referenceFasta + File? referenceFastaDict + File? referenceFastaFai + File? dbsnpVCF + File? dbsnpVCFIndex + Array[File] intervals = [] + String outputPath = "eval.table" + Boolean doNotUseAllStandardModules = false + Boolean doNotUseAllStandardStratifications = false + Array[String] evalModules = [] + Array[String] stratificationModules = [] + Array[String] samples = [] + Boolean mergeEvals = false + + String memory = "5G" + String javaXmx = "4G" + # TODO: Refine estimate. For now 4 minutes per GB of input. + Int timeMinutes = ceil(size(flatten([inputVcfs, comparisonVcfs]), "G") * 4) + String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + VariantFiltration \ + -O ~{outputPath} \ + ~{true="--eval" false="" length(inputVcfs) > 0} ~{sep=" --eval " inputVcfs} \ + ~{true="--comparison" false="" length(comparisonVcfs) > 0} ~{sep=" --comparison " comparisonVcfs} \ + ~{"-R " + referenceFasta} \ + ~{"--dbsnp " + dbsnpVCF } \ + ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ + ~{true="--sample" false="" length(samples) > 0} ~{sep=' --sample ' samples} \ + ~{true="--do-not-use-all-standard-modules" false="" doNotUseAllStandardModules} \ + ~{true="--do-not-use-all-standard-stratifications" false="" doNotUseAllStandardStratifications} \ + ~{true="-EV" false="" length(evalModules) > 0} ~{sep=" -EV " evalModules} \ + ~{true="-ST" false="" length(stratificationModules) > 0} ~{sep=" -ST " stratificationModules} \ + ~{true="--merge-evals" false="" mergeEvals} + + } + + output { + File table = outputPath + } + + runtime { + cpu: 1 + docker: dockerImage + memory: memory + time_minutes: timeMinutes + } + parameter_meta { + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "common"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "common"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "common"} + dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} + dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} + outputPath: {description: "The location the output table should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} task VariantFiltration { input { File inputVcf From c58cf2183ef7d821f1454ee8093b7a60d2141b1f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 28 May 2020 15:17:16 +0200 Subject: [PATCH 0403/1208] add variant eval parameter_meta --- gatk.wdl | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index c1cb9480..8e67edc8 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1553,8 +1553,8 @@ task SplitNCigarReads { task VariantEval { input { - Array[File] inputVcfs - Array[File] inputVcfsIndex + Array[File] evalVcfs + Array[File] evalVcfsIndex Array[File] comparisonVcfs = [] Array[File] comparisonVcfsIndex = [] File? referenceFasta @@ -1574,7 +1574,7 @@ task VariantEval { String memory = "5G" String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. - Int timeMinutes = ceil(size(flatten([inputVcfs, comparisonVcfs]), "G") * 4) + Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs]), "G") * 4) String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -1584,7 +1584,7 @@ task VariantEval { gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ VariantFiltration \ -O ~{outputPath} \ - ~{true="--eval" false="" length(inputVcfs) > 0} ~{sep=" --eval " inputVcfs} \ + ~{true="--eval" false="" length(evalVcfs) > 0} ~{sep=" --eval " evalVcfs} \ ~{true="--comparison" false="" length(comparisonVcfs) > 0} ~{sep=" --comparison " comparisonVcfs} \ ~{"-R " + referenceFasta} \ ~{"--dbsnp " + dbsnpVCF } \ @@ -1609,12 +1609,22 @@ task VariantEval { time_minutes: timeMinutes } parameter_meta { + evalVcfs: {description: "Variant sets to evaluate." category: "required"} + evalVcfsIndex: {description: "Indexes for the variant sets.", category: "required"} + comparisonVcfs: {description: "Compare set vcfs.", category: "advanced"} + comparisonVcfsIndex: {description: "Indexes for the compare sets.", category: "advanced"} + evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true)", category: "common"} + stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true)", category: "common"} + samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"} # Advanced because this description is impossible to understand... + mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track", category: "common"} + doNotUseAllStandardModules: {description: "Do not use the standard modules by default (instead, only those that are specified with the evalModules option).", category: "common"} + doNotUseAllStandardStratifications: {description: "Do not use the standard stratification modules by default (instead, only those that are specified with the stratificationModules option).", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "common"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "common"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "common"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - outputPath: {description: "The location the output table should be written.", category: "common"} + outputPath: {description: "The location the output table should be written.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From b3a80c84a6d785aa433adf6b58cd3f694b8cd5a4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 2 Jun 2020 12:29:32 +0200 Subject: [PATCH 0404/1208] Take into account index size for star alignment time requirement --- CHANGELOG.md | 1 + star.wdl | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 390faf25..6fb9a3d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Take into account index size for STAR alignment time requirement. + Added a log output for STAR. + Added report output to Hisat2. + Added output with all reports to gffcompare. diff --git a/star.wdl b/star.wdl index 8e6a511e..c262dc54 100644 --- a/star.wdl +++ b/star.wdl @@ -103,7 +103,8 @@ task Star { Int runThreadN = 4 String memory = "~{5 + ceil(size(indexFiles, "G"))}G" - Int timeMinutes = 1 + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) + # 1 minute initialization + time reading in index + time aligning data. + Int timeMinutes = 1 + ceil(size(indexFiles, "G") / 2) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } From 3605b18e3b54ddccae4ffc8553af92047338983d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 4 Jun 2020 13:35:34 +0200 Subject: [PATCH 0405/1208] higher margin for rnaseqmetrics --- CHANGELOG.md | 1 + picard.wdl | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6fb9a3d7..bde1b766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Take into account reference fasta size for Picard metrics. + Take into account index size for STAR alignment time requirement. + Added a log output for STAR. + Added report output to Hisat2. diff --git a/picard.wdl b/picard.wdl index 3103ad9b..a94abd52 100644 --- a/picard.wdl +++ b/picard.wdl @@ -87,7 +87,8 @@ task CollectMultipleMetrics { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + # Additional * 2 because picard multiple metrics reads the reference fasta twice. + Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -203,7 +204,7 @@ task CollectRnaSeqMetrics { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } From 898e02eefbd7a612dba9a2e868535d1bcc36c62a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 4 Jun 2020 13:36:41 +0200 Subject: [PATCH 0406/1208] Add comment --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index a94abd52..0ee5da36 100644 --- a/picard.wdl +++ b/picard.wdl @@ -204,6 +204,7 @@ task CollectRnaSeqMetrics { String memory = "9G" String javaXmx = "8G" + # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } From 3261563d3d5f873a062dc190f6e1c18846916f76 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 4 Jun 2020 13:41:26 +0200 Subject: [PATCH 0407/1208] Tune memory and time requirements for RNA seq --- star.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/star.wdl b/star.wdl index c262dc54..4b204eb4 100644 --- a/star.wdl +++ b/star.wdl @@ -102,9 +102,10 @@ task Star { Int? limitBAMsortRAM Int runThreadN = 4 - String memory = "~{5 + ceil(size(indexFiles, "G"))}G" - # 1 minute initialization + time reading in index + time aligning data. - Int timeMinutes = 1 + ceil(size(indexFiles, "G") / 2) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) + # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. + String memory = "~{1 + ceil(size(indexFiles, "G") * 1.3)}G" + # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. + Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } From 6d73709777868f082836de0afea1737c287f9871 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 4 Jun 2020 14:12:43 +0200 Subject: [PATCH 0408/1208] update star resource requirement --- star.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index 4b204eb4..b1c662f2 100644 --- a/star.wdl +++ b/star.wdl @@ -103,7 +103,8 @@ task Star { Int runThreadN = 4 # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. - String memory = "~{1 + ceil(size(indexFiles, "G") * 1.3)}G" + Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) + String memory = "~{memoryGb}G" # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" From bf6145791588577ee502749a7662f0683e67aab9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 07:56:50 +0200 Subject: [PATCH 0409/1208] try to fix memory --- star.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/star.wdl b/star.wdl index b1c662f2..f1549134 100644 --- a/star.wdl +++ b/star.wdl @@ -104,7 +104,9 @@ task Star { Int runThreadN = 4 # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) - String memory = "~{memoryGb}G" + # For some reason doing above calculation inside a string does not work. + # So we solve it with an optional memory string and using select_first. + String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" @@ -137,7 +139,7 @@ task Star { runtime { cpu: runThreadN - memory: memory + memory: select_first([memory, "~{memoryGb}G"]) time_minutes: timeMinutes docker: dockerImage } From a8c2b90aba14dab6c2712b69b05217fe1642605a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 08:56:28 +0200 Subject: [PATCH 0410/1208] Skip the Perl wrapper and talk to fastq jar directly --- fastqc.wdl | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/fastqc.wdl b/fastqc.wdl index e24b6ce4..512ca2e7 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -38,8 +38,13 @@ task Fastqc { String? dir Int threads = 1 - # Fastqc uses 250MB per thread in its wrapper. - String memory = "~{250 + 250 * threads}M" + # Set javaXmx a little high. Equal to fastqc default with 7 threads. + # This is because some fastq files need more memory. 2G per core + # is a nice cluster default, so we use all the rest of the memory for + # fastqc so we should have as little OOM crashes as possible even with + # weird edge case fastq's. + String javaXmx="1750M" + String memory = "2G" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? NoneArray @@ -53,26 +58,32 @@ task Fastqc { # Just as fastqc does it. String reportDir = outdirPath + "/" + sub(name, "\.[^\.]*$", "_fastqc") - command { + # We reimplement the perl wrapper here. This is the advantage that it gives + # us more control over the amount of memory used. + command <<< set -e mkdir -p ~{outdirPath} - fastqc \ - ~{"--outdir " + outdirPath} \ - ~{true="--casava" false="" casava} \ - ~{true="--nano" false="" nano} \ - ~{true="--nofilter" false="" noFilter} \ - ~{true="--extract" false="" extract} \ - ~{true="--nogroup" false="" nogroup} \ - ~{"--min_length " + minLength } \ - ~{"--format " + format} \ - ~{"--threads " + threads} \ - ~{"--contaminants " + contaminants} \ - ~{"--adapters " + adapters} \ - ~{"--limits " + limits} \ - ~{"--kmers " + kmers} \ - ~{"--dir " + dir} \ + FASTQC_DIR="/usr/local/opt/fastqc-0.11.9" + export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" + java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \ + -Xms200M -Xmx~{javaXmx} \ + ~{"-Dfastqc.output_dir=" + outdirPath} \ + ~{true="-Dfastqc.casava=true" false="" casava} \ + ~{true="-Dfastqc.nano=true" false="" nano} \ + ~{true="-Dfastqc.nofilter=true" false="" noFilter} \ + ~{true="-Dfastqc.unzip=true" false="" extract} \ + ~{true="-Dfastqc.nogroup=true" false="" nogroup} \ + ~{"-Dfastqc.min_length=" + minLength} \ + ~{"-Dfastqc.sequence_format=" + format} \ + ~{"-Dfastqc.threads=" + threads} \ + ~{"-Dfastqc.contaminant_file=" + contaminants} \ + ~{"-Dfastqc.adapter_file=" + adapters} \ + ~{"-Dfastqc.limits_file=" + limits} \ + ~{"-Dfastqc.kmer_size=" + kmers} \ + ~{"-Djava.io.tmpdir=" + dir} \ + uk.ac.babraham.FastQC.FastQCApplication ~{seqFile} - } + >>> output { File? rawReport = if extract then reportDir + "/fastqc_data.txt" else NoneFile From 56437e740ce9132ac4b45b9409bb21fd68da8b64 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 09:28:31 +0200 Subject: [PATCH 0411/1208] Add missing \ --- fastqc.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fastqc.wdl b/fastqc.wdl index 512ca2e7..81af7d59 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -58,7 +58,7 @@ task Fastqc { # Just as fastqc does it. String reportDir = outdirPath + "/" + sub(name, "\.[^\.]*$", "_fastqc") - # We reimplement the perl wrapper here. This is the advantage that it gives + # We reimplement the perl wrapper here. This has the advantage that it gives # us more control over the amount of memory used. command <<< set -e @@ -81,7 +81,7 @@ task Fastqc { ~{"-Dfastqc.limits_file=" + limits} \ ~{"-Dfastqc.kmer_size=" + kmers} \ ~{"-Djava.io.tmpdir=" + dir} \ - uk.ac.babraham.FastQC.FastQCApplication + uk.ac.babraham.FastQC.FastQCApplication \ ~{seqFile} >>> From 1db21ed6d827ace5889f37bd12d9e3fa4bea869f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 09:31:01 +0200 Subject: [PATCH 0412/1208] Add fastqc change to changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bde1b766..c38544b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ The FastQC task now talks to the Java directly instead of using the included + Perl wrapper for FastQC. This has the advantage that memory and threads can + be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) + was set, as OOM errors occurred frequently on some fastqs. + Take into account reference fasta size for Picard metrics. + Take into account index size for STAR alignment time requirement. + Added a log output for STAR. From 6e4334c2f933d70e9acf2310f0d30f4799302e64 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 12:01:34 +0200 Subject: [PATCH 0413/1208] fix typo --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 8e67edc8..88904be5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1609,7 +1609,7 @@ task VariantEval { time_minutes: timeMinutes } parameter_meta { - evalVcfs: {description: "Variant sets to evaluate." category: "required"} + evalVcfs: {description: "Variant sets to evaluate.", category: "required"} evalVcfsIndex: {description: "Indexes for the variant sets.", category: "required"} comparisonVcfs: {description: "Compare set vcfs.", category: "advanced"} comparisonVcfsIndex: {description: "Indexes for the compare sets.", category: "advanced"} From 3eccb5b72d896fedff0974579b3eb1b6ab291035 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 13:38:16 +0200 Subject: [PATCH 0414/1208] fix typo and use newer version of GATK --- gatk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 88904be5..c00af93a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1575,15 +1575,15 @@ task VariantEval { String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs]), "G") * 4) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.7.0--py38_0" } command { set -e mkdir -p "$(dirname ~{outputPath})" gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ - VariantFiltration \ - -O ~{outputPath} \ + VariantEval \ + --output ~{outputPath} \ ~{true="--eval" false="" length(evalVcfs) > 0} ~{sep=" --eval " evalVcfs} \ ~{true="--comparison" false="" length(comparisonVcfs) > 0} ~{sep=" --comparison " comparisonVcfs} \ ~{"-R " + referenceFasta} \ From 2eca5b51d53103f496abe4738540eb116634b44a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 16:15:03 +0200 Subject: [PATCH 0415/1208] fix stats task --- bcftools.wdl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 2341585d..017ba21d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -59,7 +59,10 @@ task Bcf2Vcf { task Stats { input { File inputVcf + File inputVcfIndex File? compareVcf + File? compareVcfIndex + String outputPath = basename(inputVcf) + ".stats" String? afBins String? afTag Boolean firstAlleleOnly = false @@ -88,7 +91,9 @@ task Stats { } command { - bcftools \ + set -e + mkdir -p $(dirname ~{outputPath}) + bcftools stats \ ~{"--af-bins " + afBins} \ ~{"--af-tag " + afTag} \ ~{true="--1st-allele-only" false="" firstAlleleOnly} \ @@ -109,7 +114,11 @@ task Stats { ~{"--user-tstv " + userTsTv} \ --threads ~{threads} \ ~{true="--verbose" false="" verbose} \ - ~{inputVcf} ~{compareVcf} + ~{inputVcf} ~{compareVcf} > ~{outputPath} + } + + output { + File stats = outputPath } runtime { @@ -124,10 +133,10 @@ task Stats { compareVcf: {description: "When inputVcf and compareVCF are given, the program generates separate stats for intersection and the complements. By default only sites are compared, samples must be given to include also sample columns.", category: "common"} afBins: {description: "Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1).", category: "advanced"} afTag: {description: "Allele frequency tag to use, by default estimated from AN,AC or GT.", category: "advanded"} - firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites." category: "advanced"} + firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} depth: {description: "Depth distribution: min,max,bin size [0,500,1].", category: "advanced"} - exclude: {description: "Exclude sites for which the expression is true (see man page for details)." category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} exons: {description: "Tab-delimited file with exons for indel frameshifts (chr,from,to; 1-based, inclusive, bgzip compressed).", category: "advanced"} applyFilters: {description: "Require at least one of the listed FILTER strings (e.g. \"PASS,.\").", category: "advanced"} fastaRef: {description: "Faidx indexed reference sequence file to determine INDEL context.", category: "advanced"} From 00e1180c565bf4e6da9aec1e9e62514d4be8dff6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 5 Jun 2020 16:27:54 +0200 Subject: [PATCH 0416/1208] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 390faf25..13beb086 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Added bcftools stats task. ++ Added GATK VariantEval task. + Added a log output for STAR. + Added report output to Hisat2. + Added output with all reports to gffcompare. From f2c1d5ba6110225d76a1edb101eaec503410c2e0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 8 Jun 2020 10:14:01 +0200 Subject: [PATCH 0417/1208] complete parameter_meta for bcf tools --- bcftools.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 017ba21d..bd79c2c6 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -129,8 +129,10 @@ task Stats { } parameter_meta { - inputVcf: {description: "The vcf to be analysed.", category: "required"} + inputVcf: {description: "The VCF to be analysed.", category: "required"} + inputVcfIndex: {description: "The index for the input VCF.", category: "required"} compareVcf: {description: "When inputVcf and compareVCF are given, the program generates separate stats for intersection and the complements. By default only sites are compared, samples must be given to include also sample columns.", category: "common"} + compareVcfIndex: {description: "Index for the compareVcf.", category: "common"} afBins: {description: "Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1).", category: "advanced"} afTag: {description: "Allele frequency tag to use, by default estimated from AN,AC or GT.", category: "advanded"} firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} @@ -152,5 +154,9 @@ task Stats { userTsTv: {description: ". Collect Ts/Tv stats for any tag using the given binning [0:1:100].", category: "advanced"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} } } \ No newline at end of file From a967bf6bf912292c3e93afc7be8fa685bb97d96d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 8 Jun 2020 10:15:33 +0200 Subject: [PATCH 0418/1208] Remove empty line --- gatk.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index c00af93a..09de0488 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1595,7 +1595,6 @@ task VariantEval { ~{true="-EV" false="" length(evalModules) > 0} ~{sep=" -EV " evalModules} \ ~{true="-ST" false="" length(stratificationModules) > 0} ~{sep=" -ST " stratificationModules} \ ~{true="--merge-evals" false="" mergeEvals} - } output { From 4f31b5fb1b6af42941545ee54b862ff74e6d7373 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 9 Jun 2020 15:40:40 +0200 Subject: [PATCH 0419/1208] allow for very large scattersizes --- biopet/biopet.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index cc8e1bc6..89319409 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -214,6 +214,7 @@ task ScatterRegions { input { File referenceFasta File referenceFastaDict + Int scatterSizeMillions = 1000 Int? scatterSize File? regions Boolean notSplitContigs = false @@ -230,6 +231,7 @@ task ScatterRegions { # linking. This path must be in the containers filesystem, otherwise the # linking does not work. String outputDirPath = "scatters" + String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" command <<< set -e -o pipefail @@ -237,7 +239,7 @@ task ScatterRegions { biopet-scatterregions -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -R ~{referenceFasta} \ -o ~{outputDirPath} \ - ~{"-s " + scatterSize} \ + ~{"-s " + finalSize} \ ~{"-L " + regions} \ ~{"--bamFile " + bamFile} \ ~{true="--notSplitContigs" false="" notSplitContigs} @@ -271,7 +273,8 @@ task ScatterRegions { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - scatterSize: {description: "Equivalent to biopet scatterregions' `-s` option.", category: "common"} + scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} + scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} regions: {description: "The regions to be scattered.", category: "advanced"} notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", category: "advanced"} From d15fedbf79cb0950552fa14d56d94ef537477e34 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 10 Jun 2020 09:31:19 +0200 Subject: [PATCH 0420/1208] Fix spelling error in parameter_meta of common.wdl --- common.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.wdl b/common.wdl index f8325523..e96cc1c8 100644 --- a/common.wdl +++ b/common.wdl @@ -243,7 +243,7 @@ task YamlToJson { parameter_meta { yaml: {description: "The YAML file to convert.", category: "required"} outputJson: {description: "The location the output JSON file should be written to.", category: "advanced"} - memory: {description: "The maximum aount of memroy the job will need.", category: "advanced"} + memory: {description: "The maximum amount of memory the job will need.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8aa272366f8a575fdca2dd9bd93c92db3257ebed Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 10 Jun 2020 15:45:32 +0200 Subject: [PATCH 0421/1208] Fix error in star --- star.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/star.wdl b/star.wdl index f1549134..7812e3df 100644 --- a/star.wdl +++ b/star.wdl @@ -35,7 +35,7 @@ task GenomeGenerate { command { set -e - mkdir -p "$(dirname ~{genomeDir})" + mkdir -p ~{genomeDir} STAR \ --runMode genomeGenerate \ --runThreadN ~{threads} \ @@ -50,7 +50,7 @@ task GenomeGenerate { File chrNameLength = "~{genomeDir}/chrNameLength.txt" File chrName = "~{genomeDir}/chrName.txt" File chrStart = "~{genomeDir}/chrStart.txt" - File genome = "~{genomeDir}/genome.txt" + File genome = "~{genomeDir}/Genome" File genomeParameters = "~{genomeDir}/genomeParameters.txt" File sa = "~{genomeDir}/SA" File saIndex = "~{genomeDir}/SAindex" From e323a23dc13433ce4bb8bd71d94f840e0627babb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 11 Jun 2020 16:35:59 +0200 Subject: [PATCH 0422/1208] Update minimumIdentity to correct type and default. --- CHANGELOG.md | 2 ++ talon.wdl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13beb086..29351743 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ TALON: Update `minimumIdentity` to correct type (float, was integer) & set + new default according to developers (0.8, was 0). + Added bcftools stats task. + Added GATK VariantEval task. + Added a log output for STAR. diff --git a/talon.wdl b/talon.wdl index b2ae3a62..98e0c13a 100644 --- a/talon.wdl +++ b/talon.wdl @@ -380,7 +380,7 @@ task Talon { File databaseFile String genomeBuild Float minimumCoverage = 0.9 - Int minimumIdentity = 0 + Float minimumIdentity = 0.8 String outputPrefix Int cores = 4 From 9faafc1d81422e0a0a452921ab262241bcb607f4 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 12 Jun 2020 10:04:56 +0200 Subject: [PATCH 0423/1208] Trigger travis test. --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 29351743..2d4267c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- -+ TALON: Update `minimumIdentity` to correct type (float, was integer) & set - new default according to developers (0.8, was 0). ++ TALON: Update `minimumIdentity` to correct type (float, was integer) + & set new default according to developers (0.8, was 0). + Added bcftools stats task. + Added GATK VariantEval task. + Added a log output for STAR. From 92c8725fc597743e7aa4b65ddec79f3e0bd872b3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 12 Jun 2020 10:38:14 +0200 Subject: [PATCH 0424/1208] Add new STAR options. --- CHANGELOG.md | 2 ++ star.wdl | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d4267c1..61f47609 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ STAR: Add options regarding alignment score and read length for tweaking when + processing rRNA depleted samples. + TALON: Update `minimumIdentity` to correct type (float, was integer) & set new default according to developers (0.8, was 0). + Added bcftools stats task. diff --git a/star.wdl b/star.wdl index 8e6a511e..6d80e9d3 100644 --- a/star.wdl +++ b/star.wdl @@ -95,6 +95,10 @@ task Star { String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" String readFilesCommand = "zcat" + Int outFilterScoreMin = 0 + Float outFilterScoreMinOverLread = 0.66 + Int outFilterMatchNmin = 0 + Float outFilterMatchNminOverLread = 0.66 String? outStd String? twopassMode = "Basic" Array[String]? outSAMattrRGline @@ -119,6 +123,10 @@ task Star { --genomeDir ~{sub(indexFiles[0], basename(indexFiles[0]), "")} \ --outSAMtype ~{outSAMtype} \ --readFilesCommand ~{readFilesCommand} \ + --outFilterScoreMin ~{outFilterScoreMin} \ + --outFilterScoreMinOverLread ~{outFilterScoreMinOverLread} \ + --outFilterMatchNmin ~{outFilterMatchNmin} \ + --outFilterMatchNminOverLread ~{outFilterMatchNminOverLread} \ ~{"--outSAMunmapped " + outSAMunmapped} \ ~{"--runThreadN " + runThreadN} \ ~{"--outStd " + outStd} \ @@ -146,6 +154,10 @@ task Star { outFileNamePrefix: {description: "The prefix for the output files. May include directories.", category: "required"} outSAMtype: {description: "The type of alignment file to be produced. Currently only `BAM SortedByCoordinate` is supported.", category: "advanced"} readFilesCommand: {description: "Equivalent to star's `--readFilesCommand` option.", category: "advanced"} + outFilterScoreMin: {description: "Equivalent to star's `--outFilterScoreMin` option.", category: "advanced"} + outFilterScoreMinOverLread: {description: "Equivalent to star's `--outFilterScoreMinOverLread` option.", category: "advanced"} + outFilterMatchNmin: {description: "Equivalent to star's `--outFilterMatchNmin` option.", category: "advanced"} + outFilterMatchNminOverLread: {description: "Equivalent to star's `--outFilterMatchNminOverLread` option.", category: "advanced"} outStd: {description: "Equivalent to star's `--outStd` option.", category: "advanced"} twopassMode: {description: "Equivalent to star's `--twopassMode` option.", category: "advanced"} outSAMattrRGline: {description: "The readgroup lines for the fastq pairs given (in the same order as the fastq files).", category: "common"} @@ -154,8 +166,7 @@ task Star { runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 1d784bfd5891e47dfa4821c6b4a7542463b10c2b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 12 Jun 2020 13:03:06 +0200 Subject: [PATCH 0425/1208] Update CHANGELOG. --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61f47609..77189f3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- -+ STAR: Add options regarding alignment score and read length for tweaking when - processing rRNA depleted samples. ++ STAR: Add options regarding alignment score (regarding read length as well) + for tweaking when processing rRNA depleted samples. + TALON: Update `minimumIdentity` to correct type (float, was integer) & set new default according to developers (0.8, was 0). + Added bcftools stats task. From c8d874812f45e10eded3f48979d69f6d81d7b90f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 15 Jun 2020 10:49:45 +0200 Subject: [PATCH 0426/1208] Change new options to optional. --- star.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/star.wdl b/star.wdl index 6d80e9d3..da90516d 100644 --- a/star.wdl +++ b/star.wdl @@ -95,10 +95,10 @@ task Star { String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" String readFilesCommand = "zcat" - Int outFilterScoreMin = 0 - Float outFilterScoreMinOverLread = 0.66 - Int outFilterMatchNmin = 0 - Float outFilterMatchNminOverLread = 0.66 + Int? outFilterScoreMin + Float? outFilterScoreMinOverLread + Int? outFilterMatchNmin + Float? outFilterMatchNminOverLread String? outStd String? twopassMode = "Basic" Array[String]? outSAMattrRGline From 0ad2ddbe042bf9a3520892b8980278244ccdc3db Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 15 Jun 2020 10:58:18 +0200 Subject: [PATCH 0427/1208] Fix command section now that new inputs are optional. --- star.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/star.wdl b/star.wdl index da90516d..6f95a637 100644 --- a/star.wdl +++ b/star.wdl @@ -123,10 +123,10 @@ task Star { --genomeDir ~{sub(indexFiles[0], basename(indexFiles[0]), "")} \ --outSAMtype ~{outSAMtype} \ --readFilesCommand ~{readFilesCommand} \ - --outFilterScoreMin ~{outFilterScoreMin} \ - --outFilterScoreMinOverLread ~{outFilterScoreMinOverLread} \ - --outFilterMatchNmin ~{outFilterMatchNmin} \ - --outFilterMatchNminOverLread ~{outFilterMatchNminOverLread} \ + ~{"--outFilterScoreMin " + outFilterScoreMin} \ + ~{"--outFilterScoreMinOverLread " + outFilterScoreMinOverLread} \ + ~{"--outFilterMatchNmin " + outFilterMatchNmin} \ + ~{"--outFilterMatchNminOverLread " + outFilterMatchNminOverLread} \ ~{"--outSAMunmapped " + outSAMunmapped} \ ~{"--runThreadN " + runThreadN} \ ~{"--outStd " + outStd} \ From 2cb77bebc7bafb7316450bf77a5d4805ae952d60 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 15 Jun 2020 13:17:59 +0200 Subject: [PATCH 0428/1208] Add scatter-regions task --- chunked-scatter.wdl | 49 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 111d8fa4..93a603fe 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -64,3 +64,52 @@ task ChunkedScatter { category: "advanced"} } } + + +task ScatterRegions { + input { + File inputFile + String? prefix + Boolean splitContigs = false + Int scatterSizeMillions = 1000 + Int? scatterSize + Int timeMinutes = 2 + String memory = "256M" + String dockerImage = "biowdl/chunked-scatter:latest" + } + + String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" + + command { + scatter-regions \ + --print-paths \ + --scatter-size ~{finalSize} \ + ~{true="--split-contigs" false="" splitContigs} \ + ~{"--prefix " + prefix} \ + ~{inputFile} + } + + output { + Array[File] scatters = read_lines(stdout()) + } + + runtime { + cpu: 1 + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + inputFile: {description: "The input file, either a bed file or a sequence dict. Which format is used is detected by the extension: '.bed', '.fai' or '.dict'." category: "required"} + prefix: {description: "The prefix of the ouput files. Output will be named like: .bed, in which N is an incrementing number. Default 'scatter-'.", category: "advanced"} + splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} + scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} + scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} + + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 5f13c04f7fd3c91eba022fac9dc4ad0a05d4081d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 15 Jun 2020 13:26:00 +0200 Subject: [PATCH 0429/1208] typo --- chunked-scatter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 93a603fe..f01cfc82 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -101,7 +101,7 @@ task ScatterRegions { } parameter_meta { - inputFile: {description: "The input file, either a bed file or a sequence dict. Which format is used is detected by the extension: '.bed', '.fai' or '.dict'." category: "required"} + inputFile: {description: "The input file, either a bed file or a sequence dict. Which format is used is detected by the extension: '.bed', '.fai' or '.dict'.", category: "required"} prefix: {description: "The prefix of the ouput files. Output will be named like: .bed, in which N is an incrementing number. Default 'scatter-'.", category: "advanced"} splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} From 5068af102abbd5d8726f63b4017ed6948ffc3031 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 15 Jun 2020 13:39:16 +0200 Subject: [PATCH 0430/1208] use the same prefix as the biopet-scatterregions tool --- chunked-scatter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index f01cfc82..96dbf1eb 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -69,7 +69,7 @@ task ChunkedScatter { task ScatterRegions { input { File inputFile - String? prefix + String prefix = "scatters/scatter-" Boolean splitContigs = false Int scatterSizeMillions = 1000 Int? scatterSize From a5cdc7059188174270b76b023d8a2c1c40710bef Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 08:01:57 +0200 Subject: [PATCH 0431/1208] 300 minutes per G of input --- star.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index 7812e3df..bc47bc02 100644 --- a/star.wdl +++ b/star.wdl @@ -108,7 +108,7 @@ task Star { # So we solve it with an optional memory string and using select_first. String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. - Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 180 / runThreadN) + Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 300 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } From edb371e333e05764080c48937d81bd1c7f8921ba Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 08:07:55 +0200 Subject: [PATCH 0432/1208] Also read reference and dbsnpvcf as part of time estimate --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 09de0488..700a245c 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1574,7 +1574,7 @@ task VariantEval { String memory = "5G" String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. - Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs]), "G") * 4) + Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 4) String dockerImage = "quay.io/biocontainers/gatk4:4.1.7.0--py38_0" } From 7db296e70f162c68ce3c8b05c86618c749e7f9a5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 08:58:37 +0200 Subject: [PATCH 0433/1208] resource requirements for multiqc --- multiqc.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/multiqc.wdl b/multiqc.wdl index 6a967b3f..7dcf333e 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -51,11 +51,11 @@ task MultiQC { Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload. File? config # A directory String? clConfig - - String memory = "4G" - Int timeMinutes = 120 + String? memory + Int timeMinutes = 2 + ceil(size(reports, "G") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } + Int memoryGb = 2 + ceil(size(reports, "G")) # This is where the reports end up. It does not need to be changed by the # user. It is full of symbolic links, so it is not of any use to the user @@ -132,7 +132,7 @@ task MultiQC { } runtime { - memory: memory + memory: select_first([memory, "~{memoryGb}G"]) time_minutes: timeMinutes docker: dockerImage } From 5c1625e38ab490e805f8e5a7efc158a1638fad50 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:27:18 +0200 Subject: [PATCH 0434/1208] Add scatter-regions to changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e33b51d..cb7b635e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) + that replaces biopet-scatterregions. + The FastQC task now talks to the Java directly instead of using the included Perl wrapper for FastQC. This has the advantage that memory and threads can be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) From f3006d5aea706e2f85b6edd2c9e7bf16b83e090a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:27:28 +0200 Subject: [PATCH 0435/1208] update gatk varianteval times --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 700a245c..2089eabb 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1574,7 +1574,7 @@ task VariantEval { String memory = "5G" String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. - Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 4) + Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) String dockerImage = "quay.io/biocontainers/gatk4:4.1.7.0--py38_0" } From 88a1d405ada9a859c14bf77ee4c46e92a6c384f0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:29:03 +0200 Subject: [PATCH 0436/1208] Update changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb7b635e..2b8de510 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,14 +11,14 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and + STAR. + Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) that replaces biopet-scatterregions. + The FastQC task now talks to the Java directly instead of using the included Perl wrapper for FastQC. This has the advantage that memory and threads can be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) was set, as OOM errors occurred frequently on some fastqs. -+ Take into account reference fasta size for Picard metrics. -+ Take into account index size for STAR alignment time requirement. + Added bcftools stats task. + Added GATK VariantEval task. + Added a log output for STAR. From e45b413fffdc73ed83cc21b6c38be2314d8744bc Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:51:50 +0200 Subject: [PATCH 0437/1208] add javaxmx parameter_meta --- fastqc.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fastqc.wdl b/fastqc.wdl index 81af7d59..04b6813f 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -116,6 +116,8 @@ task Fastqc { kmers: {description: "Equivalent to fastqc's --kmers option.", category: "advanced"} dir: {description: "Equivalent to fastqc's --dir option.", category: "advanced"} threads: {description: "The number of cores to use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 5c9b7c8322932d193b1d158bc681c7d5a86f8750 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 16 Jun 2020 15:55:50 +0200 Subject: [PATCH 0438/1208] Remove memoryGb from the input section --- star.wdl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/star.wdl b/star.wdl index 4c134171..4da67f72 100644 --- a/star.wdl +++ b/star.wdl @@ -106,16 +106,18 @@ task Star { Int? limitBAMsortRAM Int runThreadN = 4 - # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. - Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) - # For some reason doing above calculation inside a string does not work. - # So we solve it with an optional memory string and using select_first. String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 300 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } + # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. + Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) + # For some reason doing above calculation inside a string does not work. + # So we solve it with an optional memory string and using select_first + # in the runtime section. + #TODO Could be extended for all possible output extensions Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} From 4402a95dc00cce75fffc55bb8bfec634425b42d8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 17 Jun 2020 08:59:17 +0200 Subject: [PATCH 0439/1208] Update image and tasks for chunk scatter --- chunked-scatter.wdl | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 96dbf1eb..6f2b465d 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -28,27 +28,28 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile + String memory = "256M" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/chunked-scatter:0.1.0--py_0" + String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" } command { - set -e - mkdir -p ~{prefix} chunked-scatter \ + --print-paths \ -p ~{prefix} \ - -i ~{inputFile} \ ~{"-c " + chunkSize} \ ~{"-o " + overlap} \ - ~{"-m " + minimumBasesPerFile} + ~{"-m " + minimumBasesPerFile} \ + ~{inputFile} } output { - Array[File] scatters = glob(prefix + "*.bed") + Array[File] scatters = read_lines(stdout()) } runtime { - memory: "4G" + cpu: 1 + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -75,7 +76,7 @@ task ScatterRegions { Int? scatterSize Int timeMinutes = 2 String memory = "256M" - String dockerImage = "biowdl/chunked-scatter:latest" + String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" } String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" From aea930c687263740f1edf326de7b2c2ba21219f3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 17 Jun 2020 09:00:20 +0200 Subject: [PATCH 0440/1208] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78891941..7813c209 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Update the image for chunked-scatter and make use of new features from 0.2.0. + Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and STAR. + Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) From a0fccc3cf59afd835836d433132a46f100eb3ce6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 17 Jun 2020 09:00:51 +0200 Subject: [PATCH 0441/1208] Update parameter_meta --- chunked-scatter.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 6f2b465d..3ef0c747 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -61,6 +61,7 @@ task ChunkedScatter { overlap: {description: "Equivalent to chunked-scatter's `-o` option.", category: "advanced"} minimumBasesPerFile: {description: "Equivalent to chunked-scatter's `-m` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 185ead0f78f7450328d473ffbce157117907de0d Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 18 Jun 2020 10:43:20 +0200 Subject: [PATCH 0442/1208] Add pbmm2, the PacBio wrapper for minimap2 See https://github.com/PacificBiosciences/pbmm2 for details. --- pbmm2.wdl | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 pbmm2.wdl diff --git a/pbmm2.wdl b/pbmm2.wdl new file mode 100644 index 00000000..18d3cb9a --- /dev/null +++ b/pbmm2.wdl @@ -0,0 +1,74 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Mapping { + input { + String presetOption + Boolean sort=true + String sample + File referenceMMI + File queryFile + + Int cores = 4 + String memory = "30G" + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) + String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" + } + + command { + set -e + pbmm2 align \ + --preset ~{presetOption} \ + ~{true="--sort" false="" sort} \ + -j ~{cores} \ + ~{referenceMMI} \ + ~{queryFile} \ + ~{sample}.align.bam + + } + + output { + File outputAlignmentFile = sample + ".align.bam" + } + + runtime { + cpu: cores + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + presetOption: {description: "This option applies multiple options at the same time.", category: "required"} + sort: {description: "Sort the output bam file.", category: "advanced"} + sample: {description: "Name of the sample"} + referenceMMI: {description: "MMI file for the reference.", category: "required"} + queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + outputAlignmentFile: {description: "Mapped bam files."} + } +} From 59366d5ebdea9e781dfea4113f83871aaf30244d Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 18 Jun 2020 10:45:17 +0200 Subject: [PATCH 0443/1208] Update changelog for pbmm2 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7813c209..d1e84d9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Add tasks for pbmm2, the PacBio wrapper for minimap2. + Update the image for chunked-scatter and make use of new features from 0.2.0. + Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and STAR. From 84f1235c44abf723c9f15f5e471891fbed98b5b9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 19 Jun 2020 13:17:39 +0200 Subject: [PATCH 0444/1208] Do not use unnecessary threads --- bwa.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 3dd7883b..2cf637d1 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -132,7 +132,9 @@ task Kit { } runtime { - cpu: threads + 1 # One thread for bwa-postalt + samtools. + # One extra thread for bwa-postalt + samtools is not needed. + # These only use 5-10% of compute power and not always simultaneously. + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage From b7d9dad9ad9f804fc41ab8bda3e3961b0441fabc Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Jun 2020 11:48:00 +0200 Subject: [PATCH 0445/1208] Don't sort --- bwa.wdl | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 2cf637d1..c5980b9b 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -117,13 +117,7 @@ task Kit { k8 /opt/conda/bin/bwa-postalt.js \ -p ~{outputPrefix}.hla \ ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ - samtools sort \ - ~{"-@ " + sortThreads} \ - -m ~{sortMemoryPerThread} \ - -l ~{compressionLevel} \ - - \ - -o ~{outputPrefix}.aln.bam - samtools index ~{outputPrefix}.aln.bam ~{outputPrefix}.aln.bai + samtools view -b -1 - > ~{outputPrefix}.aln.bam } output { From 0c9ba9ae63938bf9c96a29a95c76bba5b0e64ad1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Jun 2020 12:22:45 +0200 Subject: [PATCH 0446/1208] Update samtools sort to also index --- samtools.wdl | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 5648eb1c..6454bd3a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -366,16 +366,19 @@ task Merge { task Sort { input { File inputBam - String outputPath + String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - - String memory = "2G" + Int threads = 0 + Int memoryPerThread = 4 + Int memoryGb = 1 + (threads + 1) * memoryPerThread String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - Int? threads } + # Select first needed as outputPath is optional input. (bug in cromwell) + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -385,15 +388,17 @@ task Sort { ~{"--threads " + threads} \ -o ~{outputPath} \ ~{inputBam} + samtools index ~{outputPath} ~{bamIndexPath} } output { - File outputSortedBam = outputPath + File outputBam = outputPath + File outputBamIndex = bamIndexPath } runtime { - cpu: 1 + select_first([threads, 0]) - memory: memory + cpu: 1 + threads + memory: "~{memoryGb}G" docker: dockerImage time_minutes: timeMinutes } @@ -404,12 +409,12 @@ task Sort { outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} # outputs - outputSortedBam: {description: "Sorted BAM file."} + outputBam: {description: "Sorted BAM file."} } } From feae6e792468ec23f6936dc97b349be270dbb6a9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Jun 2020 13:07:55 +0200 Subject: [PATCH 0447/1208] remove other sorting bits --- bwa.wdl | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index c5980b9b..14b9a005 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -92,14 +92,7 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - # Samtools uses *additional* threads. So by default this option should - # not be used. - Int? sortThreads - # Compression uses zlib. Higher than level 2 causes enormous slowdowns. - # GATK/Picard default is level 2. - String sortMemoryPerThread = "4G" - Int compressionLevel = 1 - String memory = "20G" + String memory = 1 + ceil(size(bwaIndex.indexFiles, "G")) Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" } @@ -122,7 +115,6 @@ task Kit { output { File outputBam = outputPrefix + ".aln.bam" - File outputBamIndex = outputPrefix + ".aln.bai" } runtime { @@ -143,9 +135,7 @@ task Kit { readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} - sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} - sortMemoryPerThread: {description: "The amount of memory for each sorting thread.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -153,7 +143,6 @@ task Kit { # outputs outputBam: "The produced BAM file." - outputBamIndex: "The index of the produced BAM file." } } From 424178d62f31a773700f5e7fb46476fd043ab51e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Jun 2020 13:25:55 +0200 Subject: [PATCH 0448/1208] use correct memory estimate --- bwa.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 14b9a005..3f1276d1 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -92,7 +92,7 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - String memory = 1 + ceil(size(bwaIndex.indexFiles, "G")) + String memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G")) Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" } @@ -121,7 +121,7 @@ task Kit { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: memory + memory: "~{memoryGb}G" time_minutes: timeMinutes docker: dockerImage } @@ -136,7 +136,7 @@ task Kit { sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 10c023ab71a6dc3fc529d439a59e5031c9682ea7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 23 Jun 2020 13:50:21 +0200 Subject: [PATCH 0449/1208] use newer more experimental image --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 3f1276d1..375d8d0b 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -94,7 +94,7 @@ task Kit { Int threads = 4 String memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G")) Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - String dockerImage = "biocontainers/bwakit:v0.7.15_cv1" + String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" } command { From 0cb76481426e96f75b81a0e1fb516a43e8bffba5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 10:38:48 +0200 Subject: [PATCH 0450/1208] Revert "Don't sort" This reverts commit b7d9dad9ad9f804fc41ab8bda3e3961b0441fabc. --- bwa.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 375d8d0b..3e11eb2f 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -110,7 +110,13 @@ task Kit { k8 /opt/conda/bin/bwa-postalt.js \ -p ~{outputPrefix}.hla \ ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ - samtools view -b -1 - > ~{outputPrefix}.aln.bam + samtools sort \ + ~{"-@ " + sortThreads} \ + -m ~{sortMemoryPerThread} \ + -l ~{compressionLevel} \ + - \ + -o ~{outputPrefix}.aln.bam + samtools index ~{outputPrefix}.aln.bam ~{outputPrefix}.aln.bai } output { From 5b096cee1b35d8ff404567571ce429f3a46ec7c4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 10:43:27 +0200 Subject: [PATCH 0451/1208] Put sorting back into bwakit task --- bwa.wdl | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 3e11eb2f..0095f48c 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -92,7 +92,16 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - String memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G")) + + # Samtools uses *additional* threads. So by default this option should + # not be used. + Int sortThreads = 0 + # Compression uses zlib. Higher than level 2 causes enormous slowdowns. + # GATK/Picard default is level 2. + Int sortMemoryPerThreadGb = 4 + Int compressionLevel = 1 + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" } @@ -112,7 +121,7 @@ task Kit { ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ ~{"-@ " + sortThreads} \ - -m ~{sortMemoryPerThread} \ + -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ -o ~{outputPrefix}.aln.bam @@ -121,6 +130,7 @@ task Kit { output { File outputBam = outputPrefix + ".aln.bam" + File outputBamIndex = outputPrefix + ".aln.bai" } runtime { @@ -141,14 +151,18 @@ task Kit { readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputBam: "The produced BAM file." + outputBamIndex: "The index of the produced BAM file." } } From 99647123ad96a38aee08ee5a85dd03b7d764a095 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 10:49:13 +0200 Subject: [PATCH 0452/1208] Make sure samtools uses memory --- samtools.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 6454bd3a..825a0531 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -370,8 +370,8 @@ task Sort { Boolean sortByName = false Int compressionLevel = 1 Int threads = 0 - Int memoryPerThread = 4 - Int memoryGb = 1 + (threads + 1) * memoryPerThread + Int memoryPerThreadGb = 4 + Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) } @@ -386,6 +386,7 @@ task Sort { -l ~{compressionLevel} \ ~{true="-n" false="" sortByName} \ ~{"--threads " + threads} \ + -m ~{memoryPerThreadGb}G \ -o ~{outputPath} \ ~{inputBam} samtools index ~{outputPath} ~{bamIndexPath} @@ -410,6 +411,7 @@ task Sort { sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 4f6bd9ad6211697fbc60a5e926717aa6d0d31398 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 10:50:40 +0200 Subject: [PATCH 0453/1208] update parameter_meta --- bwa.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 0095f48c..12f2ad54 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -155,7 +155,6 @@ task Kit { sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 0665c22fa2d64813c022e3e9dc731dac2a5fa63c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 13:25:52 +0200 Subject: [PATCH 0454/1208] Add sortSam task --- picard.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/picard.wdl b/picard.wdl index 0ee5da36..ef648aea 100644 --- a/picard.wdl +++ b/picard.wdl @@ -650,6 +650,52 @@ task ScatterIntervalList { } } +task SortSam { + input { + File inputBam + String outputPath + + Int XmxGb = 4 + Int memoryGb = 1 + XmxGb + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + # A mulled container is needed to have both picard and bwa in one container. + # This container contains: picard (2.18.7), bwa (0.7.17-r1188) + String dockerImage = "quay.io/biocontainers/picard:2.23.1--h37ae868_0" + } + + command { + mkdir -p "$(dirname ~{outputPath})" + picard -Xmx~{XmxGb}G -XX:ParallelGCThreads=1 SortSam \ + INPUT=/dev/stdin \ + OUTPUT=~{outputPath} \ + SORT_ORDER=coordinate \ + CREATE_INDEX=true + } + + output { + File outputBam = outputPath + File outputBamIndex = sub(outputPath, "\.bam$", ".bai") + } + + runtime { + cpu: 1 + memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputBam: {description: "The unsorted input BAM file", category: "required"} + outputPath: {description: "The location the output BAM file should be written to.", category: "required"} + memoryGb: {description: "The amount of memory this job will use.", category: "advanced"} + XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task SortVcf { input { Array[File]+ vcfFiles From f4a8fac6b1f3717b770c6e4bf001af70bae27373 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 14:25:31 +0200 Subject: [PATCH 0455/1208] Fix sambamba index command in sort --- sambamba.wdl | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 sambamba.wdl diff --git a/sambamba.wdl b/sambamba.wdl new file mode 100644 index 00000000..942a8ead --- /dev/null +++ b/sambamba.wdl @@ -0,0 +1,80 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sort { + input { + File inputBam + String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" + Boolean sortByName = false + Int compressionLevel = 1 + Int threads = 1 + Int memoryPerThreadGb = 4 + Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + } + + # Select first needed as outputPath is optional input. (bug in cromwell) + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + sambamba sort \ + -l ~{compressionLevel} \ + ~{true="-n" false="" sortByName} \ + ~{"--nthreads " + threads} \ + -m ~{memoryPerThreadGb}G \ + -o ~{outputPath} \ + ~{inputBam} + sambamba index \ + ~{"--nthreads " + threads} \ + ~{outputPath} ~{bamIndexPath} + } + + output { + File outputBam = outputPath + File outputBamIndex = bamIndexPath + } + + runtime { + cpu: threads + memory: "~{memoryGb}G" + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + # outputs + outputBam: {description: "Sorted BAM file."} + } +} \ No newline at end of file From 45ffa55397fed9d8385a3df28abaf65d8783b690 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 14:26:54 +0200 Subject: [PATCH 0456/1208] add threads on index as well --- samtools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 825a0531..fbc491eb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -389,7 +389,9 @@ task Sort { -m ~{memoryPerThreadGb}G \ -o ~{outputPath} \ ~{inputBam} - samtools index ~{outputPath} ~{bamIndexPath} + samtools index \ + -@ ~{threads} \ + ~{outputPath} ~{bamIndexPath} } output { From c6eb077f633198a14832ef76497e71a191daaf30 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 15:42:44 +0200 Subject: [PATCH 0457/1208] correct memory calculation --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 12f2ad54..8d2cb75c 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -101,7 +101,7 @@ task Kit { Int sortMemoryPerThreadGb = 4 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads + Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * (sortThreads + 1) Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" } From fac254be0e5ef5ab470a1328c182d18c1355cf48 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Jun 2020 15:48:49 +0200 Subject: [PATCH 0458/1208] use correct memory for sambama --- sambamba.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sambamba.wdl b/sambamba.wdl index 942a8ead..440b8f03 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -28,7 +28,7 @@ task Sort { Int compressionLevel = 1 Int threads = 1 Int memoryPerThreadGb = 4 - Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb + Int memoryGb = 1 + threads * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) } From 950207105c42cba70095c1b192d8975be32e021f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 08:06:47 +0200 Subject: [PATCH 0459/1208] Actually sort the inputBam, not stdout --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index ef648aea..5ce0c885 100644 --- a/picard.wdl +++ b/picard.wdl @@ -666,7 +666,7 @@ task SortSam { command { mkdir -p "$(dirname ~{outputPath})" picard -Xmx~{XmxGb}G -XX:ParallelGCThreads=1 SortSam \ - INPUT=/dev/stdin \ + INPUT=~{inputBam} \ OUTPUT=~{outputPath} \ SORT_ORDER=coordinate \ CREATE_INDEX=true From 7b7eb3da6af77b88228bd5707a12204990809553 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 08:45:21 +0200 Subject: [PATCH 0460/1208] Update sortsam task with extra variables --- picard.wdl | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/picard.wdl b/picard.wdl index 5ce0c885..85c07dba 100644 --- a/picard.wdl +++ b/picard.wdl @@ -654,9 +654,15 @@ task SortSam { input { File inputBam String outputPath - - Int XmxGb = 4 - Int memoryGb = 1 + XmxGb + Boolean sortByName = false + Boolean createIndex = true + Boolean createMd5File = false + Int maxRecordsInRam = 500000 + Int compressionLevel = 1 + + # Default ram of 4 GB. Using 125001.0 to prevent an answer of + # 4.000000001 which gets rounded to 5. + Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) # A mulled container is needed to have both picard and bwa in one container. # This container contains: picard (2.18.7), bwa (0.7.17-r1188) @@ -664,12 +670,17 @@ task SortSam { } command { + set -e mkdir -p "$(dirname ~{outputPath})" picard -Xmx~{XmxGb}G -XX:ParallelGCThreads=1 SortSam \ INPUT=~{inputBam} \ OUTPUT=~{outputPath} \ - SORT_ORDER=coordinate \ - CREATE_INDEX=true + MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \ + SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ + CREATE_INDEX=~{true="true" false="false" createIndex} \ + COMPRESSION_LEVEL=~{compressionLevel} \ + CREATE_MD5_FILE=~{true="true" false="false" createMd5File} + } output { @@ -679,7 +690,7 @@ task SortSam { runtime { cpu: 1 - memory: "~{memoryGb}G" + memory: "~{1 + XmxGb}G" time_minutes: timeMinutes docker: dockerImage } @@ -687,7 +698,6 @@ task SortSam { parameter_meta { inputBam: {description: "The unsorted input BAM file", category: "required"} outputPath: {description: "The location the output BAM file should be written to.", category: "required"} - memoryGb: {description: "The amount of memory this job will use.", category: "advanced"} XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 160ed6f2525aba0e673022beda11e24272ae4d31 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 08:49:17 +0200 Subject: [PATCH 0461/1208] update picard --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 85c07dba..10287583 100644 --- a/picard.wdl +++ b/picard.wdl @@ -662,6 +662,7 @@ task SortSam { # Default ram of 4 GB. Using 125001.0 to prevent an answer of # 4.000000001 which gets rounded to 5. + # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) # A mulled container is needed to have both picard and bwa in one container. From a7f50e409d90316fcac56c3ebd8ec5b705f39cb3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 09:44:48 +0200 Subject: [PATCH 0462/1208] Picard markduplicates has a default compression of 1 --- picard.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 10287583..21ff5ea1 100644 --- a/picard.wdl +++ b/picard.wdl @@ -462,6 +462,8 @@ task MarkDuplicates { Array[File] inputBamIndexes String outputBamPath String metricsPath + Int compressionLevel = 1 + Boolean createMd5File = false String memory = "9G" String javaXmx = "8G" @@ -488,13 +490,14 @@ task MarkDuplicates { INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ METRICS_FILE=~{metricsPath} \ + COMPRESSION_LEVEL=~{compressionLevel} \ VALIDATION_STRINGENCY=SILENT \ ~{"READ_NAME_REGEX=" + read_name_regex} \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ CLEAR_DT="false" \ CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false \ - CREATE_MD5_FILE=true + CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } output { From 363e90e5d7ddf5a69715b3e95cbbb2a91d9248ed Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 25 Jun 2020 09:59:38 +0200 Subject: [PATCH 0463/1208] Do not create md5 file by default anymore --- picard.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index 21ff5ea1..f0f687f0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -367,6 +367,8 @@ task GatherBamFiles { String memory = "4G" String javaXmx = "3G" + Int compressionLevel = 1 + Boolean createMd5File = false Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -378,8 +380,9 @@ task GatherBamFiles { GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ + COMPRESSION_LEVEL=~{compressionLevel} \ CREATE_INDEX=true \ - CREATE_MD5_FILE=true + CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } output { @@ -658,7 +661,6 @@ task SortSam { File inputBam String outputPath Boolean sortByName = false - Boolean createIndex = true Boolean createMd5File = false Int maxRecordsInRam = 500000 Int compressionLevel = 1 @@ -681,7 +683,7 @@ task SortSam { OUTPUT=~{outputPath} \ MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \ SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ - CREATE_INDEX=~{true="true" false="false" createIndex} \ + CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} From e15bcc6015bacc6e3b6c68257537faef28d0d1e8 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 25 Jun 2020 15:33:07 +0200 Subject: [PATCH 0464/1208] Add new tasks to talon. --- CHANGELOG.md | 3 ++ scripts | 2 +- talon.wdl | 82 +++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7813c209..1ac182c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ TALON: Update `FilterTalonTranscripts` to new version. ++ TALON: Add `GetSpliceJunctions` & `LabelReads` tasks. ++ TALON: Update to version 5.0. + Update the image for chunked-scatter and make use of new features from 0.2.0. + Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and STAR. diff --git a/scripts b/scripts index b83da72b..325a129c 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit b83da72b9b43b956a3062b78fb08044eb9fae464 +Subproject commit 325a129c14de56b2055ee0e9e0da7dc74df5fec4 diff --git a/talon.wdl b/talon.wdl index 98e0c13a..2e944382 100644 --- a/talon.wdl +++ b/talon.wdl @@ -32,7 +32,7 @@ task CreateAbundanceFileFromDatabase { String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -88,7 +88,7 @@ task CreateGtfFromDatabase { String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -137,12 +137,16 @@ task FilterTalonTranscripts { File databaseFile String annotationVersion String outputPrefix + Float maxFracA = 0.5 + Int minCount = 5 + Boolean allowGenomic = false - File? pairingsFile + File? datasetsFile + Int? minDatasets String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -152,7 +156,11 @@ task FilterTalonTranscripts { --db=~{databaseFile} \ -a ~{annotationVersion} \ ~{"--o=" + outputPrefix + "_whitelist.csv"} \ - ~{"-p " + pairingsFile} + --maxFracA=~{maxFracA} \ + --minCount=~{minCount} \ + ~{true="--allowGenomic" false="" allowGenomic} \ + --datasets=~{datasetsFile} \ + --minDatasets=~{minDatasets} } output { @@ -170,7 +178,11 @@ task FilterTalonTranscripts { databaseFile: {description: "TALON database.", category: "required"} annotationVersion: {description: "Which annotation version to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - pairingsFile: {description: "A file indicating which datasets should be considered together.", category: "advanced"} + maxFracA: {description: "Maximum fraction of As to allow in the window located immediately after any read assigned to a novel transcript.", category: "advanced"} + minCount: {description: "Number of minimum occurrences required for a novel transcript PER dataset.", category: "advanced"} + allowGenomic: {description: "If this option is set, transcripts from the Genomic novelty category will be permitted in the output.", category: "advanced"} + datasetsFile: {description: "Datasets to include.", category: "advanced"} + minDatasets: {description: "Minimum number of datasets novel transcripts must be found in.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -190,7 +202,7 @@ task GetReadAnnotations { String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -228,6 +240,54 @@ task GetReadAnnotations { } } +task GetSpliceJunctions { + input { + File GTFfile + File databaseFile + File referenceGTF + String runMode = "intron" + String outputPrefix + + String memory = "4G" + Int timeMinutes = 30 + String dockerImage = "biocontainers/talon:v5.0_cv1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + talon_get_sjs \ + --gtf ~{GTFfile} \ + --db ~{databaseFile} \ + --ref ~{referenceGTF} \ + --mode ~{runMode} \ + --outprefix ~{outputPrefix} + } + + output { + + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + GTFfile: {description: "TALON GTF file from which to extract exons/introns.", category: "required"} + databaseFile: { description: "TALON database.", category: "required"} + referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} + runMode: {description: "Determines whether to include introns or exons in the output.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs +} + task InitializeTalonDatabase { input { File GTFfile @@ -241,7 +301,7 @@ task InitializeTalonDatabase { String memory = "10G" Int timeMinutes = 60 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -293,7 +353,7 @@ task ReformatGtf { String memory = "4G" Int timeMinutes = 30 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -334,7 +394,7 @@ task SummarizeDatasets { String memory = "4G" Int timeMinutes = 50 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command { @@ -386,7 +446,7 @@ task Talon { Int cores = 4 String memory = "25G" Int timeMinutes = 2880 - String dockerImage = "biocontainers/talon:v4.4.2_cv1" + String dockerImage = "biocontainers/talon:v5.0_cv1" } command <<< From ddae57644147098dda2d867029cb0af627e145fe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 09:15:13 +0200 Subject: [PATCH 0465/1208] Increase time estimates for sort tasks --- picard.wdl | 2 +- sambamba.wdl | 2 +- samtools.wdl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index f0f687f0..f03535c0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -669,7 +669,7 @@ task SortSam { # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) # A mulled container is needed to have both picard and bwa in one container. # This container contains: picard (2.18.7), bwa (0.7.17-r1188) String dockerImage = "quay.io/biocontainers/picard:2.23.1--h37ae868_0" diff --git a/sambamba.wdl b/sambamba.wdl index 440b8f03..cccfddd3 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -30,7 +30,7 @@ task Sort { Int memoryPerThreadGb = 4 Int memoryGb = 1 + threads * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } # Select first needed as outputPath is optional input. (bug in cromwell) diff --git a/samtools.wdl b/samtools.wdl index fbc491eb..edcea9be 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -373,7 +373,7 @@ task Sort { Int memoryPerThreadGb = 4 Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } # Select first needed as outputPath is optional input. (bug in cromwell) From 5db2a1f9097132d11d3654ebe45e88364eeef517 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 10:42:12 +0200 Subject: [PATCH 0466/1208] add markdup task --- sambamba.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/sambamba.wdl b/sambamba.wdl index cccfddd3..b4114297 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -20,6 +20,49 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. + +task Markdup { + input { + Array[File] inputBams + String outputPath + Int threads = 1 + Int compressionLevel = 1 + Int? hashTableSize + Int? overFlowListSize + Int? sortBufferSize + Int? ioBufferSize + Boolean removeDuplicates = false + + # According to the manual sambamba markdup uses about 2G per 100 million reads. + Int memoryGb = 1 + ceil(size(inputBams, 'G') / 8) + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) + } + String bamIndexPath = sub(outputPath, "\.bam$", ".bai") + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + sambamba markdup \ + --nthreads ~{threads} \ + -l ~{compressionLevel} \ + ~{true="-r" false="" removeDuplicates} \ + ~{"--hash-table-size " + hashTableSize} \ + ~{"--overflow-list-size " + overFlowListSize} \ + ~{"--sort-buffer-size " + sortBufferSize} \ + ~{"--io-buffer-size " + ioBufferSize} \ + ~{sep=' ' inputBams} ~{outputPath} + sambamba index ~{outputPath} ~{bamIndexPath} + } + + runtime { + memory: "~{memoryGb}G" + cpu: threads + time_minutes: timeMinutes + docker: dockerImage + } +} + task Sort { input { File inputBam From caaba5971f6295d13ea33f728744fdb5f692d007 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 10:50:10 +0200 Subject: [PATCH 0467/1208] add outputs --- sambamba.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sambamba.wdl b/sambamba.wdl index b4114297..b402f580 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -55,6 +55,11 @@ task Markdup { sambamba index ~{outputPath} ~{bamIndexPath} } + output { + File outputBam = outputPath + File outputBamIndex = bamIndexPath + } + runtime { memory: "~{memoryGb}G" cpu: threads From 2c4dd54c7dd91809558d2f81a139a94c2399bb39 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 11:05:08 +0200 Subject: [PATCH 0468/1208] fix picard optional md5 --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index f03535c0..39575392 100644 --- a/picard.wdl +++ b/picard.wdl @@ -506,7 +506,7 @@ task MarkDuplicates { output { File outputBam = outputBamPath File outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") - File outputBamMd5 = outputBamPath + ".md5" + File? outputBamMd5 = outputBamPath + ".md5" File metricsFile = metricsPath } From 8df42e20f13986734efb6fa085088bf85da8117d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 11:07:26 +0200 Subject: [PATCH 0469/1208] do not require indexes on the markdup task --- picard.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 39575392..69a14538 100644 --- a/picard.wdl +++ b/picard.wdl @@ -462,7 +462,6 @@ task GatherVcfs { task MarkDuplicates { input { Array[File]+ inputBams - Array[File] inputBamIndexes String outputBamPath String metricsPath Int compressionLevel = 1 @@ -519,7 +518,6 @@ task MarkDuplicates { parameter_meta { # inputs inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} - inputBamIndexes: {description: "Th eindexes for the input BAM files.", category: "required"} outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} From 7252022d6ab47c58962ae330ffea273056f3560e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 14:21:07 +0200 Subject: [PATCH 0470/1208] sambamba creates index automaticall --- sambamba.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index b402f580..4de4dfdc 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -94,9 +94,8 @@ task Sort { -m ~{memoryPerThreadGb}G \ -o ~{outputPath} \ ~{inputBam} - sambamba index \ - ~{"--nthreads " + threads} \ - ~{outputPath} ~{bamIndexPath} + # sambamba creates an index for us + mv ~{outputPath}.bai ~{bamIndexPath} } output { From fbc7b95956624b01e0e60574e18f6849cc149976 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 26 Jun 2020 14:31:02 +0200 Subject: [PATCH 0471/1208] proper threads for sambamba --- sambamba.wdl | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index 4de4dfdc..1b10b37a 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -25,7 +25,8 @@ task Markdup { input { Array[File] inputBams String outputPath - Int threads = 1 + # Sambamba additional threads like samtools + Int threads = 0 Int compressionLevel = 1 Int? hashTableSize Int? overFlowListSize @@ -52,7 +53,8 @@ task Markdup { ~{"--sort-buffer-size " + sortBufferSize} \ ~{"--io-buffer-size " + ioBufferSize} \ ~{sep=' ' inputBams} ~{outputPath} - sambamba index ~{outputPath} ~{bamIndexPath} + # sambamba creates an index for us + mv ~{outputPath}.bai ~{bamIndexPath} } output { @@ -62,7 +64,7 @@ task Markdup { runtime { memory: "~{memoryGb}G" - cpu: threads + cpu: threads + 1 time_minutes: timeMinutes docker: dockerImage } @@ -74,9 +76,10 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 1 + # Sambamba additional threads like samtools + Int threads = 0 Int memoryPerThreadGb = 4 - Int memoryGb = 1 + threads * memoryPerThreadGb + Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } @@ -104,7 +107,7 @@ task Sort { } runtime { - cpu: threads + cpu: threads + 1 memory: "~{memoryGb}G" docker: dockerImage time_minutes: timeMinutes From 3cbd0cb87d04085d214573a3316f760dad6f08cc Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 29 Jun 2020 10:24:13 +0200 Subject: [PATCH 0472/1208] Add bam index to output --- pbmm2.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 18d3cb9a..84fbd2d0 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -35,7 +35,6 @@ task Mapping { } command { - set -e pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ @@ -43,11 +42,11 @@ task Mapping { ~{referenceMMI} \ ~{queryFile} \ ~{sample}.align.bam - } output { File outputAlignmentFile = sample + ".align.bam" + File outputIndexFile = sample + ".align.bam.bai" } runtime { @@ -69,6 +68,7 @@ task Mapping { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - outputAlignmentFile: {description: "Mapped bam files."} + outputAlignmentFile: {description: "Mapped bam file."} + outputIndexFile: {description: "Bam index file."} } } From 958aa3555a2e745e72b729c7433d2f24d2e6423b Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 29 Jun 2020 10:34:52 +0200 Subject: [PATCH 0473/1208] Update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index b83da72b..325a129c 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit b83da72b9b43b956a3062b78fb08044eb9fae464 +Subproject commit 325a129c14de56b2055ee0e9e0da7dc74df5fec4 From ab72ec3fd7e5ab78f083bda44b538248980c4358 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 29 Jun 2020 13:59:24 +0200 Subject: [PATCH 0474/1208] use validation stringency silent for sort sam --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 69a14538..ac7f944a 100644 --- a/picard.wdl +++ b/picard.wdl @@ -683,6 +683,7 @@ task SortSam { SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ + VALIDATION_STRINGENCY=SILENT \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } From e3a3add789a4a42c5acd40c38e2b654162893c32 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 29 Jun 2020 15:01:20 +0200 Subject: [PATCH 0475/1208] fix sambamba threads --- sambamba.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index 1b10b37a..33377736 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -26,7 +26,7 @@ task Markdup { Array[File] inputBams String outputPath # Sambamba additional threads like samtools - Int threads = 0 + Int threads = 1 Int compressionLevel = 1 Int? hashTableSize Int? overFlowListSize @@ -64,7 +64,7 @@ task Markdup { runtime { memory: "~{memoryGb}G" - cpu: threads + 1 + cpu: threads time_minutes: timeMinutes docker: dockerImage } @@ -77,9 +77,9 @@ task Sort { Boolean sortByName = false Int compressionLevel = 1 # Sambamba additional threads like samtools - Int threads = 0 + Int threads = 1 Int memoryPerThreadGb = 4 - Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb + Int memoryGb = 1 + threads * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } @@ -107,7 +107,7 @@ task Sort { } runtime { - cpu: threads + 1 + cpu: threads memory: "~{memoryGb}G" docker: dockerImage time_minutes: timeMinutes From b81efca481f40522b9ab404cb63e834e60f122a7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 29 Jun 2020 15:13:03 +0200 Subject: [PATCH 0476/1208] correct additional threads --- samtools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index edcea9be..ddc77c79 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -369,9 +369,9 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 0 + Int threads = 1 Int memoryPerThreadGb = 4 - Int memoryGb = 1 + (threads + 1) * memoryPerThreadGb + Int memoryGb = 1 + threads * memoryPerThreadGb String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) } @@ -400,7 +400,7 @@ task Sort { } runtime { - cpu: 1 + threads + cpu: 1 memory: "~{memoryGb}G" docker: dockerImage time_minutes: timeMinutes From 82e9c135d4d5dcf7c86c8108a3a4c6d230cd3691 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 29 Jun 2020 15:15:25 +0200 Subject: [PATCH 0477/1208] Remove additional thread nonsense --- bwa.wdl | 9 +++------ sambamba.wdl | 4 +--- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 8d2cb75c..3b092e8c 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -92,16 +92,13 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - - # Samtools uses *additional* threads. So by default this option should - # not be used. - Int sortThreads = 0 + Int sortThreads = 1 # Compression uses zlib. Higher than level 2 causes enormous slowdowns. # GATK/Picard default is level 2. Int sortMemoryPerThreadGb = 4 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * (sortThreads + 1) + Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" } @@ -152,7 +149,7 @@ task Kit { sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of additional threads to use for sorting.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/sambamba.wdl b/sambamba.wdl index 33377736..4ef62ddc 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -25,7 +25,6 @@ task Markdup { input { Array[File] inputBams String outputPath - # Sambamba additional threads like samtools Int threads = 1 Int compressionLevel = 1 Int? hashTableSize @@ -76,7 +75,6 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - # Sambamba additional threads like samtools Int threads = 1 Int memoryPerThreadGb = 4 Int memoryGb = 1 + threads * memoryPerThreadGb @@ -122,7 +120,7 @@ task Sort { memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} # outputs outputBam: {description: "Sorted BAM file."} From 2aa28e29bc867f90a8b6463d2d95a67301364f6d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 08:14:37 +0200 Subject: [PATCH 0478/1208] Switch sorting to samtools. Do not index as it is not required for marking duplicates --- bwa.wdl | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 3b092e8c..4cfd6fbe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,12 +29,14 @@ task Mem { String? readgroup Int threads = 4 - String memory = "~{5 + ceil(size(bwaIndex.indexFiles, "G"))}G" - String picardXmx = "4G" + Int sortThreads = 1 + Int sortMemoryPerThreadGb = 4 + Int compressionLevel = 1 + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) - # A mulled container is needed to have both picard and bwa in one container. - # This container contains: picard (2.18.7), bwa (0.7.17-r1188) - String dockerImage = "quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:43ec6124f9f4f875515f9548733b8b4e5fed9aa6-0" + # This container contains: samtools (1.10), bwa (0.7.17-r1188) + String dockerImage = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" } command { @@ -46,21 +48,21 @@ task Mem { ~{bwaIndex.fastaFile} \ ~{read1} \ ~{read2} \ - | picard -Xmx~{picardXmx} -XX:ParallelGCThreads=1 SortSam \ - INPUT=/dev/stdin \ - OUTPUT=~{outputPath} \ - SORT_ORDER=coordinate \ - CREATE_INDEX=true + | samtools sort \ + ~{"-@ " + sortThreads} \ + -m ~{sortMemoryPerThreadGb}G \ + -l ~{compressionLevel} \ + - \ + -o ~{outputPath} } output { File outputBam = outputPath - File outputBamIndex = sub(outputPath, "\.bam$", ".bai") } runtime { cpu: threads - memory: memory + memory: "~{memoryGb}G" time_minutes: timeMinutes docker: dockerImage } @@ -73,9 +75,9 @@ task Mem { readgroup: {description: "The readgroup to be assigned to the reads. See BWA mem's `-R` option.", category: "common"} threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - picardXmx: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", - category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -93,14 +95,13 @@ task Kit { Int threads = 4 Int sortThreads = 1 - # Compression uses zlib. Higher than level 2 causes enormous slowdowns. - # GATK/Picard default is level 2. Int sortMemoryPerThreadGb = 4 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - String dockerImage = "biowdl/bwakit:0.7.17-dev-experimental" + # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools + String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } command { @@ -122,12 +123,10 @@ task Kit { -l ~{compressionLevel} \ - \ -o ~{outputPrefix}.aln.bam - samtools index ~{outputPrefix}.aln.bam ~{outputPrefix}.aln.bai } output { File outputBam = outputPrefix + ".aln.bam" - File outputBamIndex = outputPrefix + ".aln.bai" } runtime { @@ -158,7 +157,6 @@ task Kit { # outputs outputBam: "The produced BAM file." - outputBamIndex: "The index of the produced BAM file." } } From d604025c5d1c603a8a29ce66587f648ebbc4d2d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 08:24:24 +0200 Subject: [PATCH 0479/1208] Fix bwakit command --- bwa.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 4cfd6fbe..0c35bf3a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -100,7 +100,7 @@ task Kit { # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools + # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -114,7 +114,7 @@ task Kit { ~{read1} \ ~{read2} \ 2> ~{outputPrefix}.log.bwamem | \ - k8 /opt/conda/bin/bwa-postalt.js \ + bwa-postalt.js \ -p ~{outputPrefix}.hla \ ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ From 187e1277c00f4ce25b496fc8a9f0d986e9870512 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 08:47:34 +0200 Subject: [PATCH 0480/1208] update sambamba memory requirements and parameter_meta --- sambamba.wdl | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index 4ef62ddc..bf58dbc8 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -25,18 +25,23 @@ task Markdup { input { Array[File] inputBams String outputPath - Int threads = 1 + # Sambamba scales like this: 1 thread is fully utilized (1). 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7. + # 2 threads reduces wall clock time by more than 40%. + Int threads = 2 Int compressionLevel = 1 Int? hashTableSize Int? overFlowListSize - Int? sortBufferSize - Int? ioBufferSize + # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1 + Int sortBufferSize = 2048 + Int ioBufferSize = 128 Boolean removeDuplicates = false - # According to the manual sambamba markdup uses about 2G per 100 million reads. - Int memoryGb = 1 + ceil(size(inputBams, 'G') / 8) + # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. + # Added 1024 mb as a margin of safety + Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) + # Time minute calculation does not work well for higher number of threads. + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads } String bamIndexPath = sub(outputPath, "\.bam$", ".bai") @@ -62,11 +67,29 @@ task Markdup { } runtime { - memory: "~{memoryGb}G" + memory: "~{memoryMb}M" cpu: threads time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + inputBams: {description: "The input BAM files.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"} + removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"} + hashTableSize: {description: "Sets sambamba's hash table size", category: "advanced"} + overFlowListSize: {description: "Sets sambamba's overflow list size", category: "advanced"} + sortBufferSize: {description: "The amount of mb allocated to the sort buffer", category: "advanced"} + ioBufferSize: {description: "The amount of mb allocated to each IO buffer. Sambamba uses two IO buffers.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + # outputs + outputBam: {description: "Sorted BAM file."} + } } task Sort { From f71d42fda049459d85b36ae2c871f62b01ca6481 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 11:22:26 +0200 Subject: [PATCH 0481/1208] Use memory estimates from WGS sample --- gatk.wdl | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 2089eabb..64297c8f 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -93,8 +93,8 @@ task ApplyBQSR { File referenceFastaDict File referenceFastaFai - String memory = "5G" - String javaXmx = "4G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 2048 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -102,7 +102,7 @@ task ApplyBQSR { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \ ApplyBQSR \ --create-output-bam-md5 \ --add-output-sam-program-record \ @@ -126,7 +126,7 @@ task ApplyBQSR { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -141,8 +141,8 @@ task ApplyBQSR { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -165,8 +165,8 @@ task BaseRecalibrator { File referenceFastaDict File referenceFastaFai - String memory = "5G" - String javaXmx = "4G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 1024 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -174,7 +174,7 @@ task BaseRecalibrator { command { set -e mkdir -p "$(dirname ~{recalibrationReportPath})" - gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \ BaseRecalibrator \ -R ~{referenceFasta} \ -I ~{inputBam} \ @@ -192,7 +192,7 @@ task BaseRecalibrator { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -210,8 +210,8 @@ task BaseRecalibrator { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -940,7 +940,7 @@ task GetPileupSummaries { } } -# Call variants on a single sample with HaplotypeCaller to produce a GVCF + task HaplotypeCaller { input { Array[File]+ inputBams @@ -962,8 +962,8 @@ task HaplotypeCaller { Boolean dontUseSoftClippedBases = false Float? standardMinConfidenceThresholdForCalling - String memory = "5G" - String javaXmx = "4G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 3072 Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -971,7 +971,7 @@ task HaplotypeCaller { command { set -e mkdir -p "$(dirname ~{outputPath})" - gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \ HaplotypeCaller \ -R ~{referenceFasta} \ -O ~{outputPath} \ @@ -996,7 +996,7 @@ task HaplotypeCaller { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -1022,8 +1022,8 @@ task HaplotypeCaller { dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From ef447f286fb9b8f5663dc88a35b16ee26b264e97 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 11:50:35 +0200 Subject: [PATCH 0482/1208] Use memory requirements based on WGS --- picard.wdl | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/picard.wdl b/picard.wdl index ac7f944a..d778e172 100644 --- a/picard.wdl +++ b/picard.wdl @@ -85,8 +85,8 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - String memory = "9G" - String javaXmx = "8G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 3072 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -96,7 +96,7 @@ task CollectMultipleMetrics { command { set -e mkdir -p "$(dirname ~{basename})" - picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \ CollectMultipleMetrics \ I=~{inputBam} \ R=~{referenceFasta} \ @@ -158,7 +158,7 @@ task CollectMultipleMetrics { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -184,9 +184,8 @@ task CollectMultipleMetrics { category: "advanced"} collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", category: "advanced"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", @@ -365,8 +364,8 @@ task GatherBamFiles { Array[File]+ inputBamsIndex String outputBamPath - String memory = "4G" - String javaXmx = "3G" + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 1024 Int compressionLevel = 1 Boolean createMd5File = false Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) @@ -376,7 +375,7 @@ task GatherBamFiles { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \ GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ @@ -394,7 +393,7 @@ task GatherBamFiles { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -403,8 +402,8 @@ task GatherBamFiles { inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From e3ba9aa0760d936ce801df955fbea9fa619613b1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 13:50:47 +0200 Subject: [PATCH 0483/1208] Add threads to samtools --- samtools.wdl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index ddc77c79..0b7ade7c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -329,6 +329,7 @@ task Merge { Array[File]+ bamFiles String outputBamPath = "merged.bam" Boolean force = true + Int threads = 1 Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" @@ -338,7 +339,10 @@ task Merge { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - samtools merge ~{true="-f" false="" force} ~{outputBamPath} ~{sep=' ' bamFiles} + samtools merge \ + --threads ~{threads} \ + ~{true="-f" false="" force} \ + ~{outputBamPath} ~{sep=' ' bamFiles} samtools index ~{outputBamPath} ~{indexPath} } @@ -348,6 +352,7 @@ task Merge { } runtime { + cpu: threads docker: dockerImage time_minutes: timeMinutes } From 2ec5b8e3a4d2aca5d6c5c74c1a439a01ad9997e2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 15:34:01 +0200 Subject: [PATCH 0484/1208] fix samtools merge --- samtools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 0b7ade7c..49495693 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -336,11 +336,12 @@ task Merge { } String indexPath = sub(outputBamPath, "\.bam$",".bai") + # Samtools uses additional threads for merge. command { set -e mkdir -p "$(dirname ~{outputBamPath})" samtools merge \ - --threads ~{threads} \ + --threads ~{threads - 1} \ ~{true="-f" false="" force} \ ~{outputBamPath} ~{sep=' ' bamFiles} samtools index ~{outputBamPath} ~{indexPath} From 822e68d987b80781e14685283b8081b7a2e49d37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 15:35:15 +0200 Subject: [PATCH 0485/1208] increase time for gatherbam --- picard.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index d778e172..8962c4b6 100644 --- a/picard.wdl +++ b/picard.wdl @@ -368,7 +368,8 @@ task GatherBamFiles { Int javaXmxMb = 1024 Int compressionLevel = 1 Boolean createMd5File = false - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 0.5) + # One minute per input gigabyte. + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } From bbeef08f838dacf283cee14a5b1494e46f8fbbe8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 30 Jun 2020 15:39:32 +0200 Subject: [PATCH 0486/1208] make compression level optional for gather bam files --- picard.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 8962c4b6..1d8376ca 100644 --- a/picard.wdl +++ b/picard.wdl @@ -366,7 +366,7 @@ task GatherBamFiles { Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 1024 - Int compressionLevel = 1 + Int? compressionLevel Boolean createMd5File = false # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) @@ -380,7 +380,7 @@ task GatherBamFiles { GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ - COMPRESSION_LEVEL=~{compressionLevel} \ + ~{"COMPRESSION_LEVEL=" + compressionLevel} \ CREATE_INDEX=true \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } From d29b7e221d6d0804e59007ce7b6260a1ae5f4159 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 1 Jul 2020 09:34:19 +0200 Subject: [PATCH 0487/1208] make md5 file optional --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 1d8376ca..0e877f23 100644 --- a/picard.wdl +++ b/picard.wdl @@ -388,7 +388,7 @@ task GatherBamFiles { output { File outputBam = outputBamPath File outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") - File outputBamMd5 = outputBamPath + ".md5" + File? outputBamMd5 = outputBamPath + ".md5" } runtime { From 71541cbda170782b87c9cad094b403dff1fe9e1f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 2 Jul 2020 11:00:57 +0200 Subject: [PATCH 0488/1208] Update tasks. --- talon.wdl | 84 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 11 deletions(-) diff --git a/talon.wdl b/talon.wdl index 2e944382..b71d0a89 100644 --- a/talon.wdl +++ b/talon.wdl @@ -242,8 +242,8 @@ task GetReadAnnotations { task GetSpliceJunctions { input { - File GTFfile - File databaseFile + File SJinformationFile + String inputFileType = "db" File referenceGTF String runMode = "intron" String outputPrefix @@ -253,19 +253,20 @@ task GetSpliceJunctions { String dockerImage = "biocontainers/talon:v5.0_cv1" } + Map[String, String] SJfileType = {"db": "--db", "gtf": "--gtf"} + command { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_get_sjs \ - --gtf ~{GTFfile} \ - --db ~{databaseFile} \ + ~{SJfileType[inputFileType] + SJinformationFile} \ --ref ~{referenceGTF} \ --mode ~{runMode} \ --outprefix ~{outputPrefix} } output { - + File outputSJfile = outputPrefix + "_" + runMode + "s.tsv" } runtime { @@ -276,8 +277,8 @@ task GetSpliceJunctions { parameter_meta { # inputs - GTFfile: {description: "TALON GTF file from which to extract exons/introns.", category: "required"} - databaseFile: { description: "TALON database.", category: "required"} + SJinformationFile: {description: "TALON GTF file or database from which to extract exons/introns.", category: "required"} + inputFileType: {description: "The file type of SJinformationFile.", category: "required"} referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} runMode: {description: "Determines whether to include introns or exons in the output.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -286,6 +287,8 @@ task GetSpliceJunctions { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs + outputSJfile: {description: "File containing locations, novelty and transcript assignments of exons/introns."} + } } task InitializeTalonDatabase { @@ -347,6 +350,65 @@ task InitializeTalonDatabase { } } +task LabelReads { + input { + File SAMfile + File referenceGenome + Int fracaRangeSize = 20 + String tmpDir = "./tmp_label_reads" + Boolean deleteTmp = true + String outputPrefix + + Int threads = 2 + String memory = "4G" + Int timeMinutes = 2880 + String dockerImage = "biocontainers/talon:v5.0_cv1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + talon_label_reads \ + --f=~{SAMfile} \ + --g=~{referenceGenome} \ + --t=~{threads} \ + --ar=~{fracaRangeSize} \ + --tmpDir=~{tmpDir} \ + ~{true="--deleteTmp" false="" deleteTmp} \ + --o=~{outputPrefix} + } + + output { + File outputLabeledSAM = outputPrefix + "_labeled.sam" + File outputReadLabels = outputPrefix + "_read_labels.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + SAMfile: {description: "SAM file of transcripts.", category: "required"} + referenceGenome: {description: "Reference genome fasta file.", category: "required"} + fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"} + tmpDir: {description: "Path to directory for tmp files.", category: "advanced"} + deleteTmp: {description: "If set, tmp dir will be removed.", category: "advanced"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputLabeledSAM: {description: "SAM file with labeled transcripts."} + outputReadLabels: {description: "Tabular file with fraction description per read."} + } +} + task ReformatGtf { input { File GTFfile @@ -443,7 +505,7 @@ task Talon { Float minimumIdentity = 0.8 String outputPrefix - Int cores = 4 + Int threads = 4 String memory = "25G" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" @@ -465,7 +527,7 @@ task Talon { ~{"--f " + outputPrefix + "/talonConfigFile.csv"} \ --db ~{databaseFile} \ --build ~{genomeBuild} \ - --threads ~{cores} \ + --threads ~{threads} \ --cov ~{minimumCoverage} \ --identity ~{minimumIdentity} \ ~{"--o " + outputPrefix + "/run"} @@ -479,7 +541,7 @@ task Talon { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -495,7 +557,7 @@ task Talon { minimumCoverage: {description: "Minimum alignment coverage in order to use a SAM entry.", category: "common"} minimumIdentity: {description: "Minimum alignment identity in order to use a SAM entry.", category: "common" } outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From e921e42fb39f384c4f5dc590b70925b9d1c02a14 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 2 Jul 2020 11:03:54 +0200 Subject: [PATCH 0489/1208] Update threads and memories. --- talon.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/talon.wdl b/talon.wdl index b71d0a89..c70850dc 100644 --- a/talon.wdl +++ b/talon.wdl @@ -359,8 +359,8 @@ task LabelReads { Boolean deleteTmp = true String outputPrefix - Int threads = 2 - String memory = "4G" + Int threads = 4 + String memory = "25G" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } From cd18ce6a03823e8b09229bddf0f1dcd9ac7a1fdb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 3 Jul 2020 10:26:38 +0200 Subject: [PATCH 0490/1208] Address comments. --- CHANGELOG.md | 3 ++- talon.wdl | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 69a57b1f..9a546718 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- -+ TALON: Update `FilterTalonTranscripts` to new version. ++ TALON: Update `FilterTalonTranscripts` to new version, which removes the + pairingsFile and replaces this with datasetsFile. + TALON: Add `GetSpliceJunctions` & `LabelReads` tasks. + TALON: Update to version 5.0. + Add tasks for pbmm2, the PacBio wrapper for minimap2. diff --git a/talon.wdl b/talon.wdl index c70850dc..87fc407d 100644 --- a/talon.wdl +++ b/talon.wdl @@ -280,7 +280,7 @@ task GetSpliceJunctions { SJinformationFile: {description: "TALON GTF file or database from which to extract exons/introns.", category: "required"} inputFileType: {description: "The file type of SJinformationFile.", category: "required"} referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} - runMode: {description: "Determines whether to include introns or exons in the output.", category: "required"} + runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 68eeeb5381649f80c6de77f8c839f02ba5eb684a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 3 Jul 2020 10:30:01 +0200 Subject: [PATCH 0491/1208] Address last comment. --- talon.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/talon.wdl b/talon.wdl index 87fc407d..a469ddba 100644 --- a/talon.wdl +++ b/talon.wdl @@ -278,7 +278,7 @@ task GetSpliceJunctions { parameter_meta { # inputs SJinformationFile: {description: "TALON GTF file or database from which to extract exons/introns.", category: "required"} - inputFileType: {description: "The file type of SJinformationFile.", category: "required"} + inputFileType: {description: "The file type of SJinformationFile.", category: "common"} referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} From 82bd73cec2e608dbda328f98dc95561d9ec8ba96 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:19:22 +0200 Subject: [PATCH 0492/1208] update haplotypecaller memory --- gatk.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 64297c8f..f79312fc 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -963,7 +963,8 @@ task HaplotypeCaller { Float? standardMinConfidenceThresholdForCalling Int memoryMb = javaXmxMb + 512 - Int javaXmxMb = 3072 + # Memory increases with time used. 4G should cover most use cases. + Int javaXmxMb = 4096 Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } From a18806aa27a484f715d92b253d17fbf7b2a94782 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:22:39 +0200 Subject: [PATCH 0493/1208] Reduce flagstat memory --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 49495693..5daf57ab 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -209,7 +209,7 @@ task Flagstat { File inputBam String outputPath - String memory = "1G" + String memory = "256M" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" } From 9749968ebb445bd9630940e5e2de478dc12ac220 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:26:34 +0200 Subject: [PATCH 0494/1208] Only little memory is needed for gatherbqsrreports --- gatk.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index f79312fc..0d36e440 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -725,8 +725,8 @@ task GatherBqsrReports { Array[File] inputBQSRreports String outputReportPath - String memory = "1G" - String javaXmx = "500M" + Int memoryMb = 256 + javaXmxMb + Int javaXmxMb = 256 Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" } @@ -734,7 +734,7 @@ task GatherBqsrReports { command { set -e mkdir -p "$(dirname ~{outputReportPath})" - gatk --java-options '-Xmx~{javaXmx} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb} -XX:ParallelGCThreads=1' \ GatherBQSRReports \ -I ~{sep=' -I ' inputBQSRreports} \ -O ~{outputReportPath} @@ -747,15 +747,15 @@ task GatherBqsrReports { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { inputBQSRreports: {description: "The BQSR reports to be merged.", category: "required"} outputReportPath: {description: "The location of the combined BQSR report.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 09772e613aaf6d9029de629b9585e060bcfd1d2d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:28:05 +0200 Subject: [PATCH 0495/1208] Comment on real life use for sambamba markdup --- sambamba.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sambamba.wdl b/sambamba.wdl index bf58dbc8..cd8da21e 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -37,7 +37,7 @@ task Markdup { Boolean removeDuplicates = false # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 1024 mb as a margin of safety + # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" # Time minute calculation does not work well for higher number of threads. From 46ea0ef31b4a49abc7859b7371d05770c040b10f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:30:30 +0200 Subject: [PATCH 0496/1208] use less memory on bcftools stats. --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index bd79c2c6..e1ec3059 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -86,7 +86,7 @@ task Stats { Int threads = 0 Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. - String memory = "2G" # TODO: Safe estimate, refine later. + String memory = "256M" String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" } From d9b05b5897e9cd33b74715da0754f4806f90b1fa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:32:31 +0200 Subject: [PATCH 0497/1208] 2GB per thread is sufficient --- bwa.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 0c35bf3a..4a0e86a8 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -30,7 +30,7 @@ task Mem { Int threads = 4 Int sortThreads = 1 - Int sortMemoryPerThreadGb = 4 + Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads @@ -95,7 +95,7 @@ task Kit { Int threads = 4 Int sortThreads = 1 - Int sortMemoryPerThreadGb = 4 + Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads From ec2bc612c1331e044608c629797076eeedec3187 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:45:52 +0200 Subject: [PATCH 0498/1208] update changelog --- CHANGELOG.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a546718..4acadc57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,23 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Picard SortSam added as a task. ++ Md5 files are no longer created by default on Picard tasks that generate + BAM files. ++ Changed PicardMarkduplicates to use COMPRESSION_LEVEL=1 by default + speeding up execution by 2x at the cost of a 20% larger BAM file. ++ Added sambamba markdup and sambamba sort. NOTE: samtools sort is more + efficient and is recommended. ++ Correctly represent samtools inconsistent use of the threads flag. + Sometimes it means 'threads' sometimes it means 'additional threads'. + BioWDL tasks now use only threads. The `threads - 1` conversion is + applied where necessary for samtools tools that use additional threads. ++ Updated BWA MEM and BWA KIT tasks to use samtools sort version 1.10 for + sorting the BAM file. ++ Updated memory requirements on bcftools Stats, bwa mem, bwakit, GATK + ApplyBQSR, GATK BaseRecalibrator, GATK GatherBqsrReports, Gatk + HaplotypeCaller, Picard CollectMultipleMetrics, Picard GatherBamFiles, + samtools Flagstat, samtools sort and bcftools stats. + TALON: Update `FilterTalonTranscripts` to new version, which removes the pairingsFile and replaces this with datasetsFile. + TALON: Add `GetSpliceJunctions` & `LabelReads` tasks. From 4f41ec07bbb1bbbafd70985ecb812d64c0444f53 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 10:55:35 +0200 Subject: [PATCH 0499/1208] add forgotten M --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index 0d36e440..939513db 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -734,7 +734,7 @@ task GatherBqsrReports { command { set -e mkdir -p "$(dirname ~{outputReportPath})" - gatk --java-options '-Xmx~{javaXmxMb} -XX:ParallelGCThreads=1' \ + gatk --java-options '-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1' \ GatherBQSRReports \ -I ~{sep=' -I ' inputBQSRreports} \ -O ~{outputReportPath} From ca368e7e5a4ff8c1d4776d0095c0d62aeb5b1083 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Jul 2020 13:41:56 +0200 Subject: [PATCH 0500/1208] add parameter_meta --- bwa.wdl | 1 + picard.wdl | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 4a0e86a8..78881ad2 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -78,6 +78,7 @@ task Mem { memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/picard.wdl b/picard.wdl index 0e877f23..c090455e 100644 --- a/picard.wdl +++ b/picard.wdl @@ -402,7 +402,8 @@ task GatherBamFiles { inputBams: {description: "The BAM files to be merged together.", category: "required"} inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} - + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} From 50c5d957408dbf8a6f1d6aa79c0a3b05ffdde664 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Jul 2020 12:41:49 +0200 Subject: [PATCH 0501/1208] Add samtools controls to hisat2 --- CHANGELOG.md | 3 +++ hisat2.wdl | 21 +++++++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4acadc57..edfffb5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Hisat2 task has added controls for samtools. ++ Alignment tasks no longer produce BAM indexes as these are not needed + by the markduplicates step. + Picard SortSam added as a task. + Md5 files are no longer created by default on Picard tasks that generate BAM files. diff --git a/hisat2.wdl b/hisat2.wdl index 5937f86d..77c370fd 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -34,7 +34,10 @@ task Hisat2 { String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" Int threads = 4 - String memory = "~{threads + 5 + ceil(size(indexFiles, "G"))}G" + Int sortThreads = 1 + Int sortMemoryPerThreadGb = 2 + Int compressionLevel = 1 + Int memoryGb = 1 + threads + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools @@ -59,18 +62,21 @@ task Hisat2 { ~{true="--dta" false="" downstreamTranscriptomeAssembly} \ --new-summary \ --summary-file ~{summaryFilePath} \ - | samtools sort > ~{outputBam} - samtools index ~{outputBam} ~{bamIndexPath} + | samtools sort \ + ~{"-@ " + sortThreads} \ + -m ~{sortMemoryPerThreadGb}G \ + -l ~{compressionLevel} \ + - \ + -o ~{outputBam} } output { File bamFile = outputBam - File bamIndex = bamIndexPath File summaryFile = summaryFilePath } runtime { - memory: memory + memory: "~{memoryGb}G" cpu: threads + 1 time_minutes: timeMinutes docker: dockerImage @@ -88,9 +94,12 @@ task Hisat2 { downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} } } \ No newline at end of file From b5d6e71a72124dc53eb9344ad0d6a1857bdaea69 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Jul 2020 12:47:13 +0200 Subject: [PATCH 0502/1208] Add outputBAMcompression to STAR --- CHANGELOG.md | 2 ++ star.wdl | 3 +++ 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index edfffb5e..e70b06a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 + compression. + Hisat2 task has added controls for samtools. + Alignment tasks no longer produce BAM indexes as these are not needed by the markduplicates step. diff --git a/star.wdl b/star.wdl index 4da67f72..3d0e2eb0 100644 --- a/star.wdl +++ b/star.wdl @@ -103,6 +103,7 @@ task Star { String? twopassMode = "Basic" Array[String]? outSAMattrRGline String? outSAMunmapped = "Within KeepPairs" + Int outBAMcompression = 1 Int? limitBAMsortRAM Int runThreadN = 4 @@ -129,6 +130,7 @@ task Star { --outFileNamePrefix ~{outFileNamePrefix} \ --genomeDir ~{sub(indexFiles[0], basename(indexFiles[0]), "")} \ --outSAMtype ~{outSAMtype} \ + --outBAMcompression ~{outBAMcompression} \ --readFilesCommand ~{readFilesCommand} \ ~{"--outFilterScoreMin " + outFilterScoreMin} \ ~{"--outFilterScoreMinOverLread " + outFilterScoreMinOverLread} \ @@ -172,6 +174,7 @@ task Star { limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"} runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From bef15749a53fe13c91aa9a9f28344c0d0b08001d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Jul 2020 13:44:07 +0200 Subject: [PATCH 0503/1208] use samtools 1.10 and hisat 2.2.0 --- hisat2.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hisat2.wdl b/hisat2.wdl index 77c370fd..c24610ed 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -41,8 +41,8 @@ task Hisat2 { Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools - # hisat2=2.1.0, samtools=1.8 - String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2388ff67fc407dad75774291ca5038f40cac4be0-0" + # hisat2=2.2.0, samtools=1.10 + String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2880dd9d8ad0a7b221d4eacda9a818e92983128d-0" } String bamIndexPath = sub(outputBam, "\.bam$", ".bai") From d0207b7f8b6234e17a03ffa4073190ea543b5c48 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 7 Jul 2020 13:44:53 +0200 Subject: [PATCH 0504/1208] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e70b06a6..a51d097f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Default docker images for bwa, bwakit and hisat2 updated to include samtools + 1.10. + Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 compression. + Hisat2 task has added controls for samtools. From 8e3788d8dd926e5924226a965b8f8cf688141ac3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 9 Jul 2020 15:53:39 +0200 Subject: [PATCH 0505/1208] Use htsjdk inflaters and deflaters for markduplicates --- picard.wdl | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/picard.wdl b/picard.wdl index c090455e..4dd4d970 100644 --- a/picard.wdl +++ b/picard.wdl @@ -467,9 +467,15 @@ task MarkDuplicates { String metricsPath Int compressionLevel = 1 Boolean createMd5File = false + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. + Boolean useJdkDeflater = true + + # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. + # https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L1040 + Int javaXmxMb = 6656 # 6.5G + String memoryMb = javaXmxMb + 512 - String memory = "9G" - String javaXmx = "8G" Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" @@ -488,7 +494,7 @@ task MarkDuplicates { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \ MarkDuplicates \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ @@ -500,7 +506,9 @@ task MarkDuplicates { CLEAR_DT="false" \ CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false \ - CREATE_MD5_FILE=~{true="true" false="false" createMd5File} + CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -513,7 +521,7 @@ task MarkDuplicates { runtime { docker: dockerImage time_minutes: timeMinutes - memory: memory + memory: "~{memoryMb}M" } parameter_meta { @@ -523,8 +531,8 @@ task MarkDuplicates { metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 7f293e334fee666f9719ba6924a45c5a1678b441 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 9 Jul 2020 16:04:19 +0200 Subject: [PATCH 0506/1208] Update changelog --- CHANGELOG.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4acadc57..4678476f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,11 +11,15 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Picard Markduplicates now uses 7G of RAM just like in GATK's best practice + example pipeline. + Picard SortSam added as a task. + Md5 files are no longer created by default on Picard tasks that generate BAM files. -+ Changed PicardMarkduplicates to use COMPRESSION_LEVEL=1 by default - speeding up execution by 2x at the cost of a 20% larger BAM file. ++ Changed PicardMarkduplicates to use COMPRESSION_LEVEL=1 by default with + the htsjdk deflater. + This makes the task finish in 30% less time at the cost of a 6% larger BAM + file. + Added sambamba markdup and sambamba sort. NOTE: samtools sort is more efficient and is recommended. + Correctly represent samtools inconsistent use of the threads flag. From 3e7b07970fa1439b3718f8f7d912858c94f657d4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 13 Jul 2020 10:42:14 +0200 Subject: [PATCH 0507/1208] Use more sort threads for alignment if more alignment threads are used --- bwa.wdl | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 78881ad2..58e1dc80 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -29,16 +29,23 @@ task Mem { String? readgroup Int threads = 4 - Int sortThreads = 1 + Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) # This container contains: samtools (1.10), bwa (0.7.17-r1188) String dockerImage = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" } + # Samtools sort may block the pipe while it is writing data to disk. + # This can lead to cpu underutilization. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) + Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + command { set -e -o pipefail mkdir -p "$(dirname ~{outputPath})" @@ -49,7 +56,7 @@ task Mem { ~{read1} \ ~{read2} \ | samtools sort \ - ~{"-@ " + sortThreads} \ + ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ @@ -62,7 +69,7 @@ task Mem { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage } @@ -95,16 +102,23 @@ task Kit { Boolean sixtyFour = false Int threads = 4 - Int sortThreads = 1 + Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int memoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } + # Samtools sort may block the pipe while it is writing data to disk. + # This can lead to cpu underutilization. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) + Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -119,7 +133,7 @@ task Kit { -p ~{outputPrefix}.hla \ ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ - ~{"-@ " + sortThreads} \ + ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ @@ -134,7 +148,7 @@ task Kit { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{memoryGb}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage } From 5e122542455264c41e6ff2aa9d7052b31ca13345 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 13 Jul 2020 11:59:28 +0200 Subject: [PATCH 0508/1208] Request less memory for htseq-count --- htseq.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/htseq.wdl b/htseq.wdl index 35faeef3..cba32c6f 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -32,7 +32,7 @@ task HTSeqCount { String? idattr Array[String] additionalAttributes = [] - String memory = "40G" + String memory = "8G" Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) String dockerImage = "quay.io/biocontainers/htseq:0.11.2--py37h637b7d7_1" } From 4bfae06690bf1d817cc73d4671620ed47838d19b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 16 Jul 2020 07:33:00 +0200 Subject: [PATCH 0509/1208] Add parameter_meta --- picard.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 4dd4d970..f9dd5210 100644 --- a/picard.wdl +++ b/picard.wdl @@ -469,6 +469,7 @@ task MarkDuplicates { Boolean createMd5File = false Boolean useJdkInflater = true # Slightly faster than the intel one. # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. + # NOTE: this might change in the future when the intel deflater is updated! Boolean useJdkDeflater = true # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. @@ -530,7 +531,10 @@ task MarkDuplicates { outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} - + createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} From 79faa7cad3bb8049c9d617804d7d2c74db29e069 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 16 Jul 2020 15:55:27 +0200 Subject: [PATCH 0510/1208] update cutadapt container --- cutadapt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index ad32ff21..d125af43 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -81,7 +81,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.8--py37h516909a_0" + String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37h516909a_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) From 3e254feafd92aed3a88c4f9e37750f7a9dbeeba6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 16 Jul 2020 16:03:51 +0200 Subject: [PATCH 0511/1208] update samtools image --- samtools.wdl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 5daf57ab..0b8394bf 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -114,7 +114,7 @@ task Fastq { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } command { @@ -170,7 +170,7 @@ task FilterShortReadsBam { String outputPathBam String memory = "1G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -211,7 +211,7 @@ task Flagstat { String memory = "256M" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } command { @@ -247,7 +247,7 @@ task Index { String? outputBamPath String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } # Select_first is needed, otherwise womtool validate fails. @@ -296,7 +296,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } command { @@ -332,7 +332,7 @@ task Merge { Int threads = 1 Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -483,7 +483,7 @@ task View { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.8--h46bd0b3_5" + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } String outputIndexPath = basename(outputFileName) + ".bai" From 8845e4f521375a41711625cab0bc010be1b41616 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 16 Jul 2020 16:06:53 +0200 Subject: [PATCH 0512/1208] Update picard container --- picard.wdl | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/picard.wdl b/picard.wdl index f9dd5210..adb55b4b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -89,7 +89,7 @@ task CollectMultipleMetrics { Int javaXmxMb = 3072 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -205,7 +205,7 @@ task CollectRnaSeqMetrics { String javaXmx = "8G" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -263,7 +263,7 @@ task CollectTargetedPcrMetrics { String memory = "4G" String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -322,7 +322,7 @@ task CreateSequenceDictionary { String memory = "3G" String javaXmx = "2G" - String dockerImage = "quay.io/biocontainers/picard:2.22.3--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -370,7 +370,7 @@ task GatherBamFiles { Boolean createMd5File = false # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -422,7 +422,7 @@ task GatherVcfs { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -478,7 +478,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" # The program default for READ_NAME_REGEX is appropriate in nearly every case. # Sometimes we wish to supply "null" in order to turn off optical duplicate detection @@ -554,7 +554,7 @@ task MergeVCFs { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } # Using MergeVcfs instead of GatherVcfs so we can create indices @@ -603,7 +603,7 @@ task SamToFastq { String memory = "17G" String javaXmx = "16G" # High memory default to avoid crashes. - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" File? NONE } @@ -640,7 +640,7 @@ task ScatterIntervalList { String memory = "4G" String javaXmx = "3G" - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { @@ -681,9 +681,7 @@ task SortSam { # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - # A mulled container is needed to have both picard and bwa in one container. - # This container contains: picard (2.18.7), bwa (0.7.17-r1188) - String dockerImage = "quay.io/biocontainers/picard:2.23.1--h37ae868_0" + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } command { From 1308c1812e3c3c62546bd44cd5015923f8cd0024 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 08:16:57 +0200 Subject: [PATCH 0513/1208] update default images --- bcftools.wdl | 2 +- chunked-scatter.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index e1ec3059..60224b0b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -87,7 +87,7 @@ task Stats { Int threads = 0 Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String memory = "256M" - String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } command { diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 3ef0c747..b54a7d2e 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -30,7 +30,7 @@ task ChunkedScatter { String memory = "256M" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" + String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } command { From 77b3d960288160fc178cace4c1c3a266d0fc205c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 08:18:25 +0200 Subject: [PATCH 0514/1208] update docker image --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 60224b0b..7f100f9b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -28,7 +28,7 @@ task Bcf2Vcf { String outputPath = "./bcftools/SV.vcf" String memory = "2G" Int timeMinutes = 1 + ceil(size(bcf, "G")) - String dockerImage = "quay.io/biocontainers/bcftools:1.9--ha228f0b_3" + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } command { From 86cd20381fdee68938b98a92cd09fbf1f9f0642b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 08:34:03 +0200 Subject: [PATCH 0515/1208] Overhaul view task --- bcftools.wdl | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 7f100f9b..e60142db 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -22,23 +22,32 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Bcf2Vcf { +task View { input { - File bcf - String outputPath = "./bcftools/SV.vcf" - String memory = "2G" - Int timeMinutes = 1 + ceil(size(bcf, "G")) + File inputFile + String outputPath = "output.vcf.gz" + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + String outputType = "z" + Int compressionLevel = 1 } command { set -e mkdir -p "$(dirname ~{outputPath})" - bcftools view ~{bcf} -O v -o ~{outputPath} + bcftools view \ + ~{inputFile} -o ~{outputPath} \ + -O ~{outputType} \ + -l ~{compressionLevel} + ~{inputFile} + bcftools index --tbi ~{outputPath} + } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" } runtime { @@ -48,9 +57,11 @@ task Bcf2Vcf { } parameter_meta { - bcf: {description: "The generated BCF from an SV caller", category: "required"} + inputFile: {description: "A vcf or bcf file", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 631b5dfa75c053afb0cd97be154bd34534e98167 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 09:27:27 +0200 Subject: [PATCH 0516/1208] fix view task --- bcftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index e60142db..b99a8cf5 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -37,9 +37,9 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ - ~{inputFile} -o ~{outputPath} \ + -o ~{outputPath} \ -O ~{outputType} \ - -l ~{compressionLevel} + -l ~{compressionLevel} \ ~{inputFile} bcftools index --tbi ~{outputPath} From 959bf985ed283d374f747a4791bed1cde753c201 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 11:09:48 +0200 Subject: [PATCH 0517/1208] Update default gatk image --- gatk.wdl | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 939513db..0f5218bd 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -35,7 +35,7 @@ task AnnotateIntervals { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -96,7 +96,7 @@ task ApplyBQSR { Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 2048 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -168,7 +168,7 @@ task BaseRecalibrator { Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 1024 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -227,7 +227,7 @@ task CalculateContamination { String memory = "13G" String javaXmx = "12G" Int timeMinutes = 180 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -271,7 +271,7 @@ task CallCopyRatioSegments { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -320,7 +320,7 @@ task CollectAllelicCounts { String memory = "11G" String javaXmx = "10G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -376,7 +376,7 @@ task CollectReadCounts { String memory = "8G" String javaXmx = "7G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -433,7 +433,7 @@ task CombineGVCFs { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -607,7 +607,7 @@ task DenoiseReadCounts { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -665,7 +665,7 @@ task FilterMutectCalls { String memory = "13G" String javaXmx = "12G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -728,7 +728,7 @@ task GatherBqsrReports { Int memoryMb = 256 + javaXmxMb Int javaXmxMb = 256 Int timeMinutes = 1 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -774,7 +774,7 @@ task GenomicsDBImport { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 180 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -833,7 +833,7 @@ task GenotypeGVCFs { String memory = "7G" String javaXmx = "6G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -899,7 +899,7 @@ task GetPileupSummaries { String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -966,7 +966,7 @@ task HaplotypeCaller { # Memory increases with time used. 4G should cover most use cases. Int javaXmxMb = 4096 Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1040,7 +1040,7 @@ task LearnReadOrientationModel { String memory = "13G" String javaXmx = "12G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.2.0--1" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1079,7 +1079,7 @@ task MergeStats { String memory = "15G" String javaXmx = "14G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1126,7 +1126,7 @@ task ModelSegments { String memory = "11G" String javaXmx = "10G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1202,7 +1202,7 @@ task MuTect2 { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 240 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1389,7 +1389,7 @@ task PreprocessIntervals { String memory = "4G" String javaXmx = "3G" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1447,7 +1447,7 @@ task SelectVariants { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1507,7 +1507,7 @@ task SplitNCigarReads { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1576,7 +1576,7 @@ task VariantEval { String javaXmx = "4G" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) - String dockerImage = "quay.io/biocontainers/gatk4:4.1.7.0--py38_0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { @@ -1647,7 +1647,7 @@ task VariantFiltration { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/gatk4:4.1.0.0--0" + String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } command { From 4b065c35ce49b561dc2aa1a4118f01e31a8bdfe4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 13:23:47 +0200 Subject: [PATCH 0518/1208] Update task with newest features --- htseq.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/htseq.wdl b/htseq.wdl index cba32c6f..829dd32f 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -25,23 +25,23 @@ task HTSeqCount { Array[File]+ inputBams File gtfFile String outputTable = "output.tsv" - String format = "bam" String order = "pos" String stranded = "no" String? featureType String? idattr Array[String] additionalAttributes = [] + Int threads = 1 String memory = "8G" Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) - String dockerImage = "quay.io/biocontainers/htseq:0.11.2--py37h637b7d7_1" + String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" } command { set -e mkdir -p "$(dirname ~{outputTable})" htseq-count \ - -f ~{format} \ + --nprocesses ~{threads} \ -r ~{order} \ -s ~{stranded} \ ~{"--type " + featureType} \ @@ -49,7 +49,7 @@ task HTSeqCount { ~{true="--additional-attr " false="" length(additionalAttributes) > 0 }~{sep=" --additional-attr " additionalAttributes} \ ~{sep=" " inputBams} \ ~{gtfFile} \ - > ~{outputTable} + -c ~{outputTable} } output { @@ -57,6 +57,7 @@ task HTSeqCount { } runtime { + cpu: threads time_minutes: timeMinutes memory: memory docker: dockerImage From 9c8b72b87bd71aa0e609e5e9b5ef7a76d37b1933 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 13:35:07 +0200 Subject: [PATCH 0519/1208] Update parameter_meta --- htseq.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/htseq.wdl b/htseq.wdl index 829dd32f..cbd8e2ac 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -31,7 +31,7 @@ task HTSeqCount { String? idattr Array[String] additionalAttributes = [] - Int threads = 1 + Int nprocesses = 1 String memory = "8G" Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" @@ -41,7 +41,7 @@ task HTSeqCount { set -e mkdir -p "$(dirname ~{outputTable})" htseq-count \ - --nprocesses ~{threads} \ + --nprocesses ~{nprocesses} \ -r ~{order} \ -s ~{stranded} \ ~{"--type " + featureType} \ @@ -57,7 +57,7 @@ task HTSeqCount { } runtime { - cpu: threads + cpu: nprocesses time_minutes: timeMinutes memory: memory docker: dockerImage @@ -67,7 +67,7 @@ task HTSeqCount { inputBams: {description: "The input BAM files.", category: "required"} gtfFile: {description: "A GTF/GFF file containing the features of interest.", category: "required"} outputTable: {description: "The path to which the output table should be written.", category: "common"} - format: {description: "Equivalent to the -f option of htseq-count.", category: "advanced"} + nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"} order: {description: "Equivalent to the -r option of htseq-count.", category: "advanced"} stranded: {description: "Equivalent to the -s option of htseq-count.", category: "common"} featureType: {description: "Equivalent to the --type option of htseq-count.", category: "advanced"} From fdf0e47fa2f22e51803f5610991dd831bc47aaf8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 15:15:55 +0200 Subject: [PATCH 0520/1208] Update images --- gatk.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 939513db..f20bf70a 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -560,7 +560,7 @@ task CreateReadCountPanelOfNormals { String memory = "8G" String javaXmx = "7G" Int timeMinutes = 5 - String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer causes a spark related error for some reason... + String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason... } command { @@ -1271,7 +1271,7 @@ task PlotDenoisedCopyRatios { String memory = "4G" String javaXmx = "3G" Int timeMinutes = 2 - String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. + String dockerImage = "broadinstitute/gatk:4.1.8.0" } command { @@ -1289,7 +1289,7 @@ task PlotDenoisedCopyRatios { output { File denoisedCopyRatiosPlot = outputDir + "/" + outputPrefix + ".denoised.png" - File denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" + File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" File standardizedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".standardizedMAD.txt" File denoisedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".denoisedMAD.txt" File deltaMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".deltaMAD.txt" @@ -1331,7 +1331,7 @@ task PlotModeledSegments { String memory = "4G" String javaXmx = "3G" Int timeMinutes = 2 - String dockerImage = "broadinstitute/gatk:4.1.4.0" # The biocontainer doesn't seem to contain R. + String dockerImage = "broadinstitute/gatk:4.1.8.0" } command { From 156e9cc405d8124b5cb2ec577ab21e33b8c0c8ad Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 17 Jul 2020 15:38:59 +0200 Subject: [PATCH 0521/1208] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f50bf650..10dc2f32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Reworked bcf2vcf task into bcftools view task. ++ Update docker images for samtools, bcftools, picard, GATK, cutadapt and + chunked-scatter. + Default docker images for bwa, bwakit and hisat2 updated to include samtools 1.10. + Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 From 9826e25a04aa9db88ebdc23cc22da5f69916eb50 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 20 Jul 2020 07:40:53 +0200 Subject: [PATCH 0522/1208] Add missing interpunction. Co-authored-by: Jasper --- bcftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index b99a8cf5..24fbb44c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -57,7 +57,7 @@ task View { } parameter_meta { - inputFile: {description: "A vcf or bcf file", category: "required"} + inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf"} memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -170,4 +170,4 @@ task Stats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} } -} \ No newline at end of file +} From 633c58b2f8f467c691ff24537fafc6359bda14d3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 20 Jul 2020 07:41:42 +0200 Subject: [PATCH 0523/1208] Remove redundant newline. Co-authored-by: Jasper --- bcftools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 24fbb44c..2677899b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -42,7 +42,6 @@ task View { -l ~{compressionLevel} \ ~{inputFile} bcftools index --tbi ~{outputPath} - } output { From 34163a93692614ae364c0f85c66e410954bf39f0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 20 Jul 2020 07:50:31 +0200 Subject: [PATCH 0524/1208] Update changelog --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10dc2f32..22e6c56f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,10 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- + Reworked bcf2vcf task into bcftools view task. -+ Update docker images for samtools, bcftools, picard, GATK, cutadapt and - chunked-scatter. ++ Removed the redundant format flag from the htseq interface. This is + autodetected in newer versions of htseq. ++ Update docker images for samtools, bcftools, picard, GATK, cutadapt, htseq + and chunked-scatter. + Default docker images for bwa, bwakit and hisat2 updated to include samtools 1.10. + Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 From c248c5bba1811f96b199df9058b18bd44aaed0c0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 21 Jul 2020 16:54:03 +0200 Subject: [PATCH 0525/1208] Update output names for the first set of tasks. --- CHANGELOG.md | 2 ++ ccs.wdl | 18 +++++----- centrifuge.wdl | 42 +++++++++++------------ isoseq3.wdl | 28 ++++++++-------- lima.wdl | 38 ++++++++++----------- talon.wdl | 90 +++++++++++++++++++++++++------------------------- 6 files changed, 110 insertions(+), 108 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22e6c56f..9796c484 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Renamed outputs of tasks used in the TALON-WDL, PacBio-subreads-processing & + sequence-classification pipelines. + Reworked bcf2vcf task into bcftools view task. + Removed the redundant format flag from the htseq interface. This is autodetected in newer versions of htseq. diff --git a/ccs.wdl b/ccs.wdl index 1762ac75..60e43711 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -55,10 +55,10 @@ task CCS { } output { - File outputCCSfile = outputPrefix + ".ccs.bam" - File outputCCSindexFile = outputPrefix + ".ccs.bam.pbi" - File outputReportFile = outputPrefix + ".ccs.report.txt" - File outputSTDERRfile = outputPrefix + ".ccs.stderr.log" + File ccsBam = outputPrefix + ".ccs.bam" + File ccsBamIndex = outputPrefix + ".ccs.bam.pbi" + File ccsReport = outputPrefix + ".ccs.report.txt" + File ccsStderr = outputPrefix + ".ccs.stderr.log" } runtime { @@ -70,7 +70,7 @@ task CCS { parameter_meta { # inputs - minPasses: {description: "Minimum number of full-length subreads required to generate CCS for a ZMW.", category: "advanced"} + minPasses: {description: "Minimum number of full-length subreads required to generate ccs for a ZMW.", category: "advanced"} minLength: {description: "Minimum draft length before polishing.", category: "advanced"} maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} @@ -84,9 +84,9 @@ task CCS { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputCCSfile: {description: "Consensus reads output file."} - outputCCSindexFile: {description: "Index of consensus reads output file."} - outputReportFile: {description: "CCS results report file."} - outputSTDERRfile: {description: "CCS STDERR log file."} + ccsBam: {description: "Consensus reads output file."} + ccsBamIndex: {description: "Index of consensus reads output file."} + ccsReport: {description: "Ccs results report file."} + ccsStderr: {description: "Ccs STDERR log file."} } } diff --git a/centrifuge.wdl b/centrifuge.wdl index f2b26043..e1cddcad 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -59,7 +59,7 @@ task Build { } output { - Array[File] outputIndex = glob(outputPrefix + "/" + indexBasename + "*.cf") + Array[File] index = glob(outputPrefix + "/" + indexBasename + "*.cf") } runtime { @@ -75,7 +75,7 @@ task Build { conversionTable: {description: "List of UIDs (unique ID) and corresponding taxonomic IDs.", category: "required"} taxonomyTree: {description: "Taxonomic tree (e.g. nodes.dmp).", category: "required"} nameTable: {description: "Name table (e.g. names.dmp).", category: "required"} - referenceFile: {description: "A comma-separated list of FASTA files containing the reference sequences to be aligned to.", category: "required"} + referenceFile: {description: "A comma-separated list of fasta files containing the reference sequences to be aligned to.", category: "required"} indexBasename: {description: "The basename of the index files to write.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} offrate: {description: "The number of rows marked by the indexer.", category: "common"} @@ -88,7 +88,7 @@ task Build { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputIndex: {description: "Generated Centrifuge index."} + index: {description: "Generated centrifuge index."} } } @@ -142,9 +142,9 @@ task Classify { >>> output { - File outputMetrics = outputPrefix + "_alignment_metrics.tsv" - File outputClassification = outputPrefix + "_classification.tsv" - File outputReport = outputPrefix + "_output_report.tsv" + File metrics = outputPrefix + "_alignment_metrics.tsv" + File classification = outputPrefix + "_classification.tsv" + File report = outputPrefix + "_output_report.tsv" } runtime { @@ -156,7 +156,7 @@ task Classify { parameter_meta { # inputs inputFormat: {description: "The format of the read file(s).", category: "required"} - phred64: {description: "If set to true, Phred+64 encoding is used.", category: "required"} + phred64: {description: "If set to true, phred+64 encoding is used.", category: "required"} minHitLength: {description: "Minimum length of partial hits.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} @@ -172,9 +172,9 @@ task Classify { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputMetrics: {description: "File with Centrifuge metrics."} - outputClassification: {description: "File with the classification results."} - outputReport: {description: "File with a classification summary."} + metrics: {description: "File with centrifuge metrics."} + classification: {description: "File with the classification results."} + report: {description: "File with a classification summary."} } } @@ -209,7 +209,7 @@ task Inspect { >>> output { - File outputInspect = outputPrefix + "/" + printOption + File inspectResult = outputPrefix + "/" + printOption } runtime { @@ -223,13 +223,13 @@ task Inspect { printOption: {description: "The output option for inspect (fasta, summary, conversionTable, taxonomyTree, nameTable, sizeTable)", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - across: {description: "When printing FASTA output, output a newline character every bases.", category: "common"} + across: {description: "When printing fasta output, output a newline character every bases.", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputInspect: {description: "Output file according to output option."} + inspectResult: {description: "Output file according to output option."} } } @@ -300,7 +300,7 @@ task DownloadTaxonomy { } } -task Kreport { +task KReport { input { File centrifugeClassification String outputPrefix @@ -337,7 +337,7 @@ task Kreport { >>> output { - File outputKreport = outputPrefix + "_kreport.tsv" + File KReport = outputPrefix + "_kreport.tsv" } runtime { @@ -348,10 +348,10 @@ task Kreport { parameter_meta { # inputs - centrifugeClassification: {description: "File with Centrifuge classification results.", category: "required"} + centrifugeClassification: {description: "File with centrifuge classification results.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} - noLCA: {description: "Do not report the LCA of multiple assignments, but report count fractions at the taxa.", category: "advanced"} + noLCA: {description: "Do not report the lca of multiple assignments, but report count fractions at the taxa.", category: "advanced"} showZeros: {description: "Show clades that have zero reads.", category: "advanced"} isCountTable: {description: "The format of the file is taxIDCOUNT.", category: "advanced"} minimumScore: {description: "Require a minimum score for reads to be counted.", category: "advanced"} @@ -361,7 +361,7 @@ task Kreport { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputKreport: {description: "File with kraken style report."} + KReport: {description: "File with kraken style report."} } } @@ -384,7 +384,7 @@ task KTimportTaxonomy { } output { - File outputKronaPlot = outputPrefix + "_krona.html" + File kronaPlot = outputPrefix + "_krona.html" } runtime { @@ -395,13 +395,13 @@ task KTimportTaxonomy { parameter_meta { # inputs - inputFile: {description: "File with Centrifuge classification results.", category: "required"} + inputFile: {description: "File with centrifuge classification results.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputKronaPlot: {description: "Krona taxonomy plot html file."} + kronaPlot: {description: "Krona taxonomy plot html file."} } } diff --git a/isoseq3.wdl b/isoseq3.wdl index 9e0dfdb2..f369553f 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -51,12 +51,12 @@ task Refine { } output { - File outputFLNCfile = outputDir + "/" + outputNamePrefix + ".bam" - File outputFLNCindexFile = outputDir + "/" + outputNamePrefix + ".bam.pbi" - File outputConsensusReadsetFile = outputDir + "/" + outputNamePrefix + ".consensusreadset.xml" - File outputFilterSummaryFile = outputDir + "/" + outputNamePrefix + ".filter_summary.json" - File outputReportFile = outputDir + "/" + outputNamePrefix + ".report.csv" - File outputSTDERRfile = outputDir + "/" + outputNamePrefix + ".stderr.log" + File refineBam = outputDir + "/" + outputNamePrefix + ".bam" + File refineBamIndex = outputDir + "/" + outputNamePrefix + ".bam.pbi" + File refineConsensusReadset = outputDir + "/" + outputNamePrefix + ".consensusreadset.xml" + File refineFilterSummary = outputDir + "/" + outputNamePrefix + ".filter_summary.json" + File refineReport = outputDir + "/" + outputNamePrefix + ".report.csv" + File refineStderr = outputDir + "/" + outputNamePrefix + ".stderr.log" } runtime { @@ -69,9 +69,9 @@ task Refine { parameter_meta { # inputs minPolyAlength: {description: "Minimum poly(A) tail length.", category: "advanced"} - requirePolyA: {description: "Require FL reads to have a poly(A) tail and remove it.", category: "common"} + requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} - inputBamFile: {description: "BAM input file.", category: "required"} + inputBamFile: {description: "Bam input file.", category: "required"} primerFile: {description: "Barcode/primer fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputNamePrefix: {description: "Basename of the output files.", category: "required"} @@ -81,11 +81,11 @@ task Refine { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputFLNCfile: {description: "Filtered reads output file."} - outputFLNCindexFile: {description: "Index of filtered reads output file."} - outputSTDERRfile: {description: "Refine STDERR log file."} - outputConsensusReadsetFile: {description: "Refine consensus readset XML file."} - outputFilterSummaryFile: {description: "Refine summary file."} - outputReportFile: {description: "Refine report file."} + refineBam: {description: "Filtered reads output file."} + refineBamIndex: {description: "Index of filtered reads output file."} + refineConsensusReadset: {description: "Refine consensus readset xml file."} + refineFilterSummary: {description: "Refine summary file."} + refineReport: {description: "Refine report file."} + refineStderr: {description: "Refine stderr log file."} } } diff --git a/lima.wdl b/lima.wdl index ddd37da4..2e8a7085 100644 --- a/lima.wdl +++ b/lima.wdl @@ -98,14 +98,14 @@ task Lima { } output { - Array[File] outputFLfile = glob("*.bam") - Array[File] outputFLindexFile = glob("*.bam.pbi") - Array[File] outputFLxmlFile = glob("*.subreadset.xml") - File outputSTDERRfile = outputPrefix + ".fl.stderr.log" - File outputJSONfile = outputPrefix + ".fl.json" - File outputCountsFile = outputPrefix + ".fl.lima.counts" - File outputReportFile = outputPrefix + ".fl.lima.report" - File outputSummaryFile = outputPrefix + ".fl.lima.summary" + Array[File] limaBam = glob("*.bam") + Array[File] limaBamIndex = glob("*.bam.pbi") + Array[File] limaXml = glob("*.subreadset.xml") + File limaStderr = outputPrefix + ".fl.stderr.log" + File limaJson = outputPrefix + ".fl.json" + File limaCounts = outputPrefix + ".fl.lima.counts" + File limaReport = outputPrefix + ".fl.lima.report" + File limaSummary = outputPrefix + ".fl.lima.summary" } runtime { @@ -131,15 +131,15 @@ task Lima { minEndScore: {description: "Minimum end barcode score threshold is applied to the individual leading and trailing ends.", category: "advanced"} minSignalIncrease: {description: "The minimal score difference, between first and combined, required to call a barcode pair different.", category: "advanced"} minScoreLead: {description: "The minimal score lead required to call a barcode pair significant.", category: "common"} - ccsMode: {description: "CCS mode, use optimal alignment options.", category: "common"} - splitBamNamed: {description: "Split BAM output by resolved barcode pair name.", category: "common"} + ccsMode: {description: "Ccs mode, use optimal alignment options.", category: "common"} + splitBamNamed: {description: "Split bam output by resolved barcode pair name.", category: "common"} scoredAdapterRatio: {description: "Minimum ratio of scored vs sequenced adapters.", category: "advanced"} peek: {description: "Demux the first N ZMWs and return the mean score, 0 means peeking deactivated.", category: "advanced"} guess: {description: "Try to guess the used barcodes, using the provided mean score threshold, 0 means guessing deactivated.", category: "advanced"} guessMinCount: {description: "Minimum number of ZMWs observed to whitelist barcodes.", category: "advanced"} peekGuess: {description: "Try to infer the used barcodes subset, by peeking at the first 50,000 ZMWs.", category: "advanced"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} - inputBamFile: {description: "BAM input file.", category: "required"} + inputBamFile: {description: "Bam input file.", category: "required"} barcodeFile: {description: "Barcode/primer fasta file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -148,13 +148,13 @@ task Lima { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputFLfile: {description: "Demultiplexed reads output file(s)."} - outputFLindexFile: {description: "Index of demultiplexed reads output file(s)."} - outputFLxmlFile: {description: "XML file of the subreadset(s)."} - outputSTDERRfile: {description: "Lima STDERR log file."} - outputJSONfile: {description: "Lima JSON file."} - outputCountsFile: {description: "Lima counts file."} - outputReportFile: {description: "Lima report file."} - outputSummaryFile: {description: "Lima summary file."} + limaBam: {description: "Demultiplexed reads output file(s)."} + limaBamIndex: {description: "Index of demultiplexed reads output file(s)."} + limaXml: {description: "Xml file of the subreadset(s)."} + limaStderr: {description: "Lima stderr log file."} + limaJson: {description: "Lima json file."} + limaCounts: {description: "Lima counts file."} + limaReport: {description: "Lima report file."} + limaSummary: {description: "Lima summary file."} } } diff --git a/talon.wdl b/talon.wdl index a469ddba..e39a3cd2 100644 --- a/talon.wdl +++ b/talon.wdl @@ -48,7 +48,7 @@ task CreateAbundanceFileFromDatabase { } output { - File outputAbundanceFile = outputPrefix + "_talon_abundance.tsv" + File abundanceFile = outputPrefix + "_talon_abundance.tsv" } runtime { @@ -59,7 +59,7 @@ task CreateAbundanceFileFromDatabase { parameter_meta { # inputs - databaseFile: {description: "TALON database.", category: "required"} + databaseFile: {description: "Talon database.", category: "required"} annotationVersion: {description: "Which annotation version to use.", category: "required"} genomeBuild: {description: "Genome build to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -70,7 +70,7 @@ task CreateAbundanceFileFromDatabase { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputAbundanceFile: {description: "Abundance for each transcript in the TALON database across datasets."} + abundanceFile: {description: "Abundance for each transcript in the talon database across datasets."} } } @@ -105,7 +105,7 @@ task CreateGtfFromDatabase { } output { - File outputGTFfile = outputPrefix + "_talon.gtf" + File gtfFile = outputPrefix + "_talon.gtf" } runtime { @@ -116,7 +116,7 @@ task CreateGtfFromDatabase { parameter_meta { # inputs - databaseFile: {description: "TALON database.", category: "required"} + databaseFile: {description: "Talon database.", category: "required"} genomeBuild: {description: "Genome build to use.", category: "required"} annotationVersion: {description: "Which annotation version to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -128,7 +128,7 @@ task CreateGtfFromDatabase { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputGTFfile: {description: "The genes, transcripts, and exons stored a TALON database in GTF format."} + gtfFile: {description: "The genes, transcripts, and exons stored a talon database in gtf format."} } } @@ -164,7 +164,7 @@ task FilterTalonTranscripts { } output { - File outputTranscriptWhitelist = outputPrefix + "_whitelist.csv" + File transcriptWhitelist = outputPrefix + "_whitelist.csv" } runtime { @@ -175,11 +175,11 @@ task FilterTalonTranscripts { parameter_meta { # inputs - databaseFile: {description: "TALON database.", category: "required"} + databaseFile: {description: "Talon database.", category: "required"} annotationVersion: {description: "Which annotation version to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} maxFracA: {description: "Maximum fraction of As to allow in the window located immediately after any read assigned to a novel transcript.", category: "advanced"} - minCount: {description: "Number of minimum occurrences required for a novel transcript PER dataset.", category: "advanced"} + minCount: {description: "Number of minimum occurrences required for a novel transcript per dataset.", category: "advanced"} allowGenomic: {description: "If this option is set, transcripts from the Genomic novelty category will be permitted in the output.", category: "advanced"} datasetsFile: {description: "Datasets to include.", category: "advanced"} minDatasets: {description: "Minimum number of datasets novel transcripts must be found in.", category: "advanced"} @@ -188,7 +188,7 @@ task FilterTalonTranscripts { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputTranscriptWhitelist: {description: "A transcript whitelist produced from the TALON database."} + transcriptWhitelist: {description: "Transcript whitelist produced from the talon database."} } } @@ -216,7 +216,7 @@ task GetReadAnnotations { } output { - File outputAnnotation = outputPrefix + "_talon_read_annot.tsv" + File readAnnotations = outputPrefix + "_talon_read_annot.tsv" } runtime { @@ -227,7 +227,7 @@ task GetReadAnnotations { parameter_meta { # inputs - databaseFile: { description: "TALON database.", category: "required"} + databaseFile: { description: "Talon database.", category: "required"} genomeBuild: {description: "Genome build to use.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetFile: {description: "A file indicating which datasets should be included.", category: "advanced"} @@ -236,7 +236,7 @@ task GetReadAnnotations { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputAnnotation: {description: "Read-specific annotation information from a TALON database."} + readAnnotations: {description: "Read-specific annotation information from a talon database."} } } @@ -266,7 +266,7 @@ task GetSpliceJunctions { } output { - File outputSJfile = outputPrefix + "_" + runMode + "s.tsv" + File spliceJunctions = outputPrefix + "_" + runMode + "s.tsv" } runtime { @@ -277,9 +277,9 @@ task GetSpliceJunctions { parameter_meta { # inputs - SJinformationFile: {description: "TALON GTF file or database from which to extract exons/introns.", category: "required"} + SJinformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"} inputFileType: {description: "The file type of SJinformationFile.", category: "common"} - referenceGTF: {description: "GTF reference file (ie GENCODE).", category: "required"} + referenceGTF: {description: "Gtf reference file (ie gencode).", category: "required"} runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -287,7 +287,7 @@ task GetSpliceJunctions { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputSJfile: {description: "File containing locations, novelty and transcript assignments of exons/introns."} + spliceJunctions: {description: "File containing locations, novelty and transcript assignments of exons/introns."} } } @@ -322,7 +322,7 @@ task InitializeTalonDatabase { } output { - File outputDatabase = outputPrefix + ".db" + File database = outputPrefix + ".db" } runtime { @@ -333,11 +333,11 @@ task InitializeTalonDatabase { parameter_meta { # inputs - GTFfile: {description: "GTF annotation containing genes, transcripts, and edges.", category: "required"} - genomeBuild: {description: "Name of genome build that the GTF file is based on (ie hg38).", category: "required"} + GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} + genomeBuild: {description: "Name of genome build that the gtf file is based on (ie hg38).", category: "required"} annotationVersion: {description: "Name of supplied annotation (will be used to label data).", category: "required"} minimumLength: { description: "Minimum required transcript length.", category: "common"} - novelIDprefix: {description: "Prefix for naming novel discoveries in eventual TALON runs.", category: "common"} + novelIDprefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"} cutoff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"} cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -346,7 +346,7 @@ task InitializeTalonDatabase { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputDatabase: {description: "TALON database."} + database: {description: "Talon database."} } } @@ -379,8 +379,8 @@ task LabelReads { } output { - File outputLabeledSAM = outputPrefix + "_labeled.sam" - File outputReadLabels = outputPrefix + "_read_labels.tsv" + File labeledSam = outputPrefix + "_labeled.sam" + File readLabels = outputPrefix + "_read_labels.tsv" } runtime { @@ -392,7 +392,7 @@ task LabelReads { parameter_meta { # inputs - SAMfile: {description: "SAM file of transcripts.", category: "required"} + SAMfile: {description: "Sam file of transcripts.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"} tmpDir: {description: "Path to directory for tmp files.", category: "advanced"} @@ -404,8 +404,8 @@ task LabelReads { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputLabeledSAM: {description: "SAM file with labeled transcripts."} - outputReadLabels: {description: "Tabular file with fraction description per read."} + labeledSam: {description: "Sam file with labeled transcripts."} + readLabels: {description: "Tabular file with fraction description per read."} } } @@ -425,7 +425,7 @@ task ReformatGtf { } output { - File outputReformattedGTF = GTFfile + File reformattedGtf = GTFfile } runtime { @@ -436,13 +436,13 @@ task ReformatGtf { parameter_meta { # inputs - GTFfile: {description: "GTF annotation containing genes, transcripts, and edges.", category: "required"} + GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputReformattedGTF: {description: "Reformatted GTF file."} + reformattedGtf: {description: "Reformatted gtf file."} } } @@ -470,7 +470,7 @@ task SummarizeDatasets { } output { - File outputSummaryFile = outputPrefix + "_talon_summary.tsv" + File summaryFile = outputPrefix + "_talon_summary.tsv" } runtime { @@ -481,7 +481,7 @@ task SummarizeDatasets { parameter_meta { # inputs - databaseFile: {description: "TALON database.", category: "required"} + databaseFile: {description: "Talon database.", category: "required"} setVerbose: {description: "Print out the counts in terminal.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} @@ -490,7 +490,7 @@ task SummarizeDatasets { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputSummaryFile: {description: "Tab-delimited file of gene and transcript counts for each dataset."} + summaryFile: {description: "Tab-delimited file of gene and transcript counts for each dataset."} } } @@ -534,10 +534,10 @@ task Talon { >>> output { - File outputUpdatedDatabase = databaseFile - File outputLog = outputPrefix + "/run_QC.log" - File outputAnnot = outputPrefix + "/run_talon_read_annot.tsv" - File outputConfigFile = outputPrefix + "/talonConfigFile.csv" + File updatedDatabase = databaseFile + File talonLog = outputPrefix + "/run_QC.log" + File talonAnnotation = outputPrefix + "/run_talon_read_annot.tsv" + File talonConfigFile = outputPrefix + "/talonConfigFile.csv" } runtime { @@ -549,13 +549,13 @@ task Talon { parameter_meta { # inputs - SAMfiles: {description: "Input SAM files.", category: "required"} + SAMfiles: {description: "Input sam files.", category: "required"} organism: {description: "The name of the organism from which the samples originated.", category: "required"} sequencingPlatform: {description: "The sequencing platform used to generate long reads.", category: "required"} - databaseFile: {description: "TALON database. Created using initialize_talon_database.py.", category: "required"} + databaseFile: {description: "Talon database. Created using initialize_talon_database.py.", category: "required"} genomeBuild: {description: "Genome build (i.e. hg38) to use.", category: "required"} - minimumCoverage: {description: "Minimum alignment coverage in order to use a SAM entry.", category: "common"} - minimumIdentity: {description: "Minimum alignment identity in order to use a SAM entry.", category: "common" } + minimumCoverage: {description: "Minimum alignment coverage in order to use a sam entry.", category: "common"} + minimumIdentity: {description: "Minimum alignment identity in order to use a sam entry.", category: "common" } outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -563,9 +563,9 @@ task Talon { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputUpdatedDatabase: {description: "Updated TALON database."} - outputLog: {description: "Log file from TALON run."} - outputAnnot: {description: "Read annotation file from TALON run."} - outputConfigFile: {description: "The TALON configuration file."} + updatedDatabase: {description: "Updated talon database."} + talonLog: {description: "Log file from talon run."} + talonAnnotation: {description: "Read annotation file from talon run."} + talonConfigFile: {description: "The talon configuration file."} } } From 4fbfe713091934c380df0f1b565428fe520af638 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 11:25:37 +0200 Subject: [PATCH 0526/1208] Fix last set of tasks. --- centrifuge.wdl | 14 +++++----- isoseq3.wdl | 6 ++--- minimap2.wdl | 24 ++++++++--------- samtools.wdl | 4 +-- talon.wdl | 64 ++++++++++++++++++++++----------------------- transcriptclean.wdl | 64 ++++++++++++++++++++++----------------------- 6 files changed, 87 insertions(+), 89 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index e1cddcad..ee305325 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -281,7 +281,7 @@ task Download { task DownloadTaxonomy { input { - String centrifugeTaxonomyDir + String taxonomyDir String executable = "centrifuge-download" String? preCommand } @@ -290,19 +290,19 @@ task DownloadTaxonomy { set -e -o pipefail ~{preCommand} ~{executable} \ - -o ~{centrifugeTaxonomyDir} \ + -o ~{taxonomyDir} \ taxonomy } output { - File taxonomyTree = centrifugeTaxonomyDir + "/nodes.dmp" - File nameTable = centrifugeTaxonomyDir + "/names.dmp" + File taxonomyTree = taxonomyDir + "/nodes.dmp" + File nameTable = taxonomyDir + "/names.dmp" } } task KReport { input { - File centrifugeClassification + File classification String outputPrefix Array[File]+ indexFiles Boolean noLCA = false @@ -332,7 +332,7 @@ task KReport { ~{true="--is-count-table" false="" isCountTable} \ ~{"--min-score " + minimumScore} \ ~{"--min-length " + minimumLength} \ - ~{centrifugeClassification} \ + ~{classification} \ > ~{outputPrefix + "_kreport.tsv"} >>> @@ -348,7 +348,7 @@ task KReport { parameter_meta { # inputs - centrifugeClassification: {description: "File with centrifuge classification results.", category: "required"} + classification: {description: "File with centrifuge classification results.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} noLCA: {description: "Do not report the lca of multiple assignments, but report count fractions at the taxa.", category: "advanced"} diff --git a/isoseq3.wdl b/isoseq3.wdl index f369553f..604a71d5 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -22,7 +22,7 @@ version 1.0 task Refine { input { - Int minPolyAlength = 20 + Int minPolyALength = 20 Boolean requirePolyA = false String logLevel = "WARN" File inputBamFile @@ -40,7 +40,7 @@ task Refine { set -e mkdir -p "~{outputDir}" isoseq3 refine \ - --min-polya-length ~{minPolyAlength} \ + --min-polya-length ~{minPolyALength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ @@ -68,7 +68,7 @@ task Refine { parameter_meta { # inputs - minPolyAlength: {description: "Minimum poly(A) tail length.", category: "advanced"} + minPolyALength: {description: "Minimum poly(A) tail length.", category: "advanced"} requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "Bam input file.", category: "required"} diff --git a/minimap2.wdl b/minimap2.wdl index 04b02bf2..fb31fb7f 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -50,7 +50,7 @@ task Indexing { } output { - File outputIndexFile = outputPrefix + ".mmi" + File indexFile = outputPrefix + ".mmi" } runtime { @@ -62,7 +62,7 @@ task Indexing { parameter_meta { # input - useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for PacBio).", category: "advanced"} + useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for pacbio).", category: "advanced"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} minimizerWindowSize: {description: "Minimizer window size.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} @@ -74,7 +74,7 @@ task Indexing { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - outputIndexFile: {description: "Indexed reference file."} + indexFile: {description: "Indexed reference file."} } } @@ -83,9 +83,9 @@ task Mapping { String presetOption Int kmerSize = 15 Boolean skipSelfAndDualMappings = false - Boolean outputSAM = false + Boolean outputSam = false String outputPrefix - Boolean addMDtagToSAM = false + Boolean addMDTagToSam = false Boolean secondaryAlignment = false File referenceFile File queryFile @@ -110,9 +110,9 @@ task Mapping { -x ~{presetOption} \ -k ~{kmerSize} \ ~{true="-X" false="" skipSelfAndDualMappings} \ - ~{true="-a" false="" outputSAM} \ + ~{true="-a" false="" outputSam} \ -o ~{outputPrefix} \ - ~{true="--MD" false="" addMDtagToSAM} \ + ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ -t ~{cores} \ ~{"-G " + maxIntronLength} \ @@ -126,7 +126,7 @@ task Mapping { } output { - File outputAlignmentFile = outputPrefix + File alignmentFile = outputPrefix } runtime { @@ -139,16 +139,16 @@ task Mapping { parameter_meta { presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} - outputSAM: {description: "Output in the SAM format.", category: "common"} + outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} maxIntronLength: {description: "Max intron length (effective with -xsplice; changing -r).", category: "advanced"} maxFragmentLength: {description: "Max fragment length (effective with -xsr or in the fragment mode).", category: "advanced"} skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} - retainMaxSecondaryAlignments: {description: "Retain at most INT secondary alignments.", category: "advanced"} + retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} - addMDtagToSAM: {description: "Adds a MD tag to the SAM output file.", category: "common"} + addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} referenceFile: {description: "Reference fasta file.", category: "required"} queryFile: {description: "Input fasta file.", category: "required"} @@ -158,6 +158,6 @@ task Mapping { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - outputAlignmentFile: {description: "Mapping and alignment between collections of DNA sequences file."} + alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} } } diff --git a/samtools.wdl b/samtools.wdl index 0b8394bf..c155f026 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -423,6 +423,7 @@ task Sort { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + # outputs outputBam: {description: "Sorted BAM file."} } @@ -526,11 +527,10 @@ task View { excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/talon.wdl b/talon.wdl index e39a3cd2..c796c1ce 100644 --- a/talon.wdl +++ b/talon.wdl @@ -242,9 +242,9 @@ task GetReadAnnotations { task GetSpliceJunctions { input { - File SJinformationFile + File sjInformationFile String inputFileType = "db" - File referenceGTF + File referenceGtf String runMode = "intron" String outputPrefix @@ -259,8 +259,8 @@ task GetSpliceJunctions { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_get_sjs \ - ~{SJfileType[inputFileType] + SJinformationFile} \ - --ref ~{referenceGTF} \ + ~{SJfileType[inputFileType] + sjInformationFile} \ + --ref ~{referenceGtf} \ --mode ~{runMode} \ --outprefix ~{outputPrefix} } @@ -277,9 +277,9 @@ task GetSpliceJunctions { parameter_meta { # inputs - SJinformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"} - inputFileType: {description: "The file type of SJinformationFile.", category: "common"} - referenceGTF: {description: "Gtf reference file (ie gencode).", category: "required"} + sjInformationFile: {description: "Talon gtf file or database from which to extract exons/introns.", category: "required"} + inputFileType: {description: "The file type of sjInformationFile.", category: "common"} + referenceGtf: {description: "Gtf reference file (ie gencode).", category: "required"} runMode: {description: "Determines whether to include introns or exons in the output.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -293,13 +293,13 @@ task GetSpliceJunctions { task InitializeTalonDatabase { input { - File GTFfile + File gtfFile String genomeBuild String annotationVersion Int minimumLength = 300 - String novelIDprefix = "TALON" - Int cutoff5p = 500 - Int cutoff3p = 300 + String novelPrefix = "TALON" + Int cutOff5p = 500 + Int cutOff3p = 300 String outputPrefix String memory = "10G" @@ -311,13 +311,13 @@ task InitializeTalonDatabase { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_initialize_database \ - --f=~{GTFfile} \ + --f=~{gtfFile} \ --g=~{genomeBuild} \ --a=~{annotationVersion} \ --l=~{minimumLength} \ - --idprefix=~{novelIDprefix} \ - --5p=~{cutoff5p} \ - --3p=~{cutoff3p} \ + --idprefix=~{novelPrefix} \ + --5p=~{cutOff5p} \ + --3p=~{cutOff3p} \ --o=~{outputPrefix} } @@ -333,13 +333,13 @@ task InitializeTalonDatabase { parameter_meta { # inputs - GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} + gtfFile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} genomeBuild: {description: "Name of genome build that the gtf file is based on (ie hg38).", category: "required"} annotationVersion: {description: "Name of supplied annotation (will be used to label data).", category: "required"} minimumLength: { description: "Minimum required transcript length.", category: "common"} - novelIDprefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"} - cutoff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"} - cutoff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} + novelPrefix: {description: "Prefix for naming novel discoveries in eventual talon runs.", category: "common"} + cutOff5p: { description: "Maximum allowable distance (bp) at the 5' end during annotation.", category: "advanced"} + cutOff3p: {description: "Maximum allowable distance (bp) at the 3' end during annotation.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -352,7 +352,7 @@ task InitializeTalonDatabase { task LabelReads { input { - File SAMfile + File samFile File referenceGenome Int fracaRangeSize = 20 String tmpDir = "./tmp_label_reads" @@ -369,7 +369,7 @@ task LabelReads { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_label_reads \ - --f=~{SAMfile} \ + --f=~{samFile} \ --g=~{referenceGenome} \ --t=~{threads} \ --ar=~{fracaRangeSize} \ @@ -392,7 +392,7 @@ task LabelReads { parameter_meta { # inputs - SAMfile: {description: "Sam file of transcripts.", category: "required"} + samFile: {description: "Sam file of transcripts.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"} tmpDir: {description: "Path to directory for tmp files.", category: "advanced"} @@ -411,7 +411,7 @@ task LabelReads { task ReformatGtf { input { - File GTFfile + File gtfFile String memory = "4G" Int timeMinutes = 30 @@ -421,11 +421,11 @@ task ReformatGtf { command { set -e talon_reformat_gtf \ - -gtf ~{GTFfile} + -gtf ~{gtfFile} } output { - File reformattedGtf = GTFfile + File reformattedGtf = gtfFile } runtime { @@ -436,7 +436,7 @@ task ReformatGtf { parameter_meta { # inputs - GTFfile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} + gtfFile: {description: "Gtf annotation containing genes, transcripts, and edges.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -452,7 +452,7 @@ task SummarizeDatasets { Boolean setVerbose = false String outputPrefix - File? datasetGroupsCSV + File? datasetGroupsCsv String memory = "4G" Int timeMinutes = 50 @@ -466,7 +466,7 @@ task SummarizeDatasets { --db ~{databaseFile} \ ~{true="--verbose" false="" setVerbose} \ --o ~{outputPrefix} \ - ~{"--groups " + datasetGroupsCSV} + ~{"--groups " + datasetGroupsCsv} } output { @@ -484,7 +484,7 @@ task SummarizeDatasets { databaseFile: {description: "Talon database.", category: "required"} setVerbose: {description: "Print out the counts in terminal.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - datasetGroupsCSV: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} + datasetGroupsCsv: {description: "File of comma-delimited dataset groups to process together.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -496,7 +496,7 @@ task SummarizeDatasets { task Talon { input { - Array[File] SAMfiles + Array[File] samFiles String organism String sequencingPlatform = "PacBio-RS-II" File databaseFile @@ -518,7 +518,7 @@ task Talon { ln -s $PWD/tmp /tmp/sqltmp #Multiprocessing will crash if the absolute path is too long. export TMPDIR=/tmp/sqltmp printf "" > ~{outputPrefix}/talonConfigFile.csv #File needs to be emptied when task is rerun. - for file in ~{sep=" " SAMfiles} + for file in ~{sep=" " samFiles} do configFileLine="$(basename ${file%.*}),~{organism},~{sequencingPlatform},${file}" echo ${configFileLine} >> ~{outputPrefix}/talonConfigFile.csv @@ -549,7 +549,7 @@ task Talon { parameter_meta { # inputs - SAMfiles: {description: "Input sam files.", category: "required"} + samFiles: {description: "Input sam files.", category: "required"} organism: {description: "The name of the organism from which the samples originated.", category: "required"} sequencingPlatform: {description: "The sequencing platform used to generate long reads.", category: "required"} databaseFile: {description: "Talon database. Created using initialize_talon_database.py.", category: "required"} diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 15da1f58..daf79703 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -22,7 +22,7 @@ version 1.0 task GetSJsFromGtf { input { - File GTFfile + File gtfFile File genomeFile String outputPrefix Int minIntronSize = 21 @@ -36,14 +36,14 @@ task GetSJsFromGtf { set -e mkdir -p "$(dirname ~{outputPrefix})" get_SJs_from_gtf \ - --f=~{GTFfile} \ + --f=~{gtfFile} \ --g=~{genomeFile} \ --minIntronSize=~{minIntronSize} \ ~{"--o=" + outputPrefix + ".tsv"} } output { - File outputSJsFile = outputPrefix + ".tsv" + File spliceJunctionFile = outputPrefix + ".tsv" } runtime { @@ -54,22 +54,21 @@ task GetSJsFromGtf { parameter_meta { # inputs - GTFfile: {description: "Input GTF file", category: "required"} + gtfFile: {description: "Input gtf file", category: "required"} genomeFile: {description: "Reference genome", category: "required"} minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputSJsFile: {description: "Extracted splice junctions."} + spliceJunctionFile: {description: "Extracted splice junctions."} } } task GetTranscriptCleanStats { input { - File transcriptCleanSAMfile + File transcriptCleanSamFile String outputPrefix String memory = "4G" @@ -81,12 +80,12 @@ task GetTranscriptCleanStats { set -e mkdir -p "$(dirname ~{outputPrefix})" get_TranscriptClean_stats \ - ~{transcriptCleanSAMfile} \ + ~{transcriptCleanSamFile} \ ~{outputPrefix} } output { - File outputStatsFile = stdout() + File statsFile = stdout() } runtime { @@ -97,24 +96,23 @@ task GetTranscriptCleanStats { parameter_meta { # inputs - transcriptCleanSAMfile: {description: "Output SAM file from TranscriptClean", category: "required"} + transcriptCleanSamFile: {description: "Output sam file from transcriptclean", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputStatsFile: {description: "Summary stats from TranscriptClean run."} + statsFile: {description: "Summary stats from transcriptclean run."} } } task TranscriptClean { input { - File SAMfile + File samFile File referenceGenome Int maxLenIndel = 5 - Int maxSJoffset = 5 + Int maxSJOffset = 5 String outputPrefix Boolean correctMismatches = true Boolean correctIndels = true @@ -138,7 +136,7 @@ task TranscriptClean { set -e mkdir -p "$(dirname ~{outputPrefix})" TranscriptClean \ - -s ~{SAMfile} \ + -s ~{samFile} \ -g ~{referenceGenome} \ -t ~{cores} \ --maxLenIndel=~{maxLenIndel} \ @@ -157,10 +155,10 @@ task TranscriptClean { } output { - File outputTranscriptCleanFasta = outputPrefix + "_clean.fa" - File outputTranscriptCleanLog = outputPrefix + "_clean.log" - File outputTranscriptCleanSAM = outputPrefix + "_clean.sam" - File outputTranscriptCleanTElog = outputPrefix + "_clean.TE.log" + File fastaFile = outputPrefix + "_clean.fa" + File logFile = outputPrefix + "_clean.log" + File samFile = outputPrefix + "_clean.sam" + File logFileTE = outputPrefix + "_clean.TE.log" } runtime { @@ -172,21 +170,21 @@ task TranscriptClean { parameter_meta { # inputs - SAMfile: {description: "Input SAM file containing transcripts to correct.", category: "required"} + samFile: {description: "Input sam file containing transcripts to correct.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} maxLenIndel: {description: "Maximum size indel to correct.", category: "advanced"} - maxSJoffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"} + maxSJOffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - correctMismatches: {description: "Set this to make TranscriptClean correct mismatches.", category: "common"} - correctIndels: {description: "Set this to make TranscriptClean correct indels.", category: "common"} - correctSJs: {description: "Set this to make TranscriptClean correct splice junctions.", category: "common"} - dryRun: {description: "TranscriptClean will read in the data but don't do any correction.", category: "advanced"} + correctMismatches: {description: "Set this to make transcriptclean correct mismatches.", category: "common"} + correctIndels: {description: "Set this to make transcriptclean correct indels.", category: "common"} + correctSJs: {description: "Set this to make transcriptclean correct splice junctions.", category: "common"} + dryRun: {description: "Transcriptclean will read in the data but don't do any correction.", category: "advanced"} primaryOnly: {description: "Only output primary mappings of transcripts.", category: "advanced"} canonOnly: {description: "Only output canonical transcripts and transcript containing annotated noncanonical junctions.", category: "advanced"} bufferSize: {description: "Number of lines to output to file at once by each thread during run.", category: "common"} - deleteTmp: {description: "The temporary directory generated by TranscriptClean will be removed.", category: "common"} + deleteTmp: {description: "The temporary directory generated by transcriptclean will be removed.", category: "common"} spliceJunctionAnnotation: {description: "Splice junction file.", category: "common"} - variantFile: {description: "VCF formatted file of variants.", category: "common"} + variantFile: {description: "Vcf formatted file of variants.", category: "common"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -194,9 +192,9 @@ task TranscriptClean { category: "advanced"} # outputs - outputTranscriptCleanFasta: {description: "Fasta file containing corrected reads."} - outputTranscriptCleanLog: {description: "Log file of TranscriptClean run."} - outputTranscriptCleanSAM: {description: "SAM file containing corrected aligned reads."} - outputTranscriptCleanTElog: {description: "TE log file of TranscriptClean run."} + fastaFile: {description: "Fasta file containing corrected reads."} + logFile: {description: "Log file of transcriptclean run."} + samFile: {description: "Sam file containing corrected aligned reads."} + logFileTE: {description: "TE log file of transcriptclean run."} } } From 7e8c833eadb0259da33e6d641393f77dbe3f2578 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 11:54:00 +0200 Subject: [PATCH 0527/1208] Fix tests. --- transcriptclean.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/transcriptclean.wdl b/transcriptclean.wdl index daf79703..6e0de8a9 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -109,7 +109,7 @@ task GetTranscriptCleanStats { task TranscriptClean { input { - File samFile + File inputSam File referenceGenome Int maxLenIndel = 5 Int maxSJOffset = 5 @@ -136,7 +136,7 @@ task TranscriptClean { set -e mkdir -p "$(dirname ~{outputPrefix})" TranscriptClean \ - -s ~{samFile} \ + -s ~{inputSam} \ -g ~{referenceGenome} \ -t ~{cores} \ --maxLenIndel=~{maxLenIndel} \ @@ -170,7 +170,7 @@ task TranscriptClean { parameter_meta { # inputs - samFile: {description: "Input sam file containing transcripts to correct.", category: "required"} + inputSam: {description: "Input sam file containing transcripts to correct.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} maxLenIndel: {description: "Maximum size indel to correct.", category: "advanced"} maxSJOffset: {description: "Maximum distance from annotated splice junction to correct.", category: "advanced"} From 5e572ffcf5057fb2ac90bbde90f21e27936dc793 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 11:57:23 +0200 Subject: [PATCH 0528/1208] Fix some input naming. --- transcriptclean.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 6e0de8a9..1eea686c 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -68,7 +68,7 @@ task GetSJsFromGtf { task GetTranscriptCleanStats { input { - File transcriptCleanSamFile + File inputSam String outputPrefix String memory = "4G" @@ -80,7 +80,7 @@ task GetTranscriptCleanStats { set -e mkdir -p "$(dirname ~{outputPrefix})" get_TranscriptClean_stats \ - ~{transcriptCleanSamFile} \ + ~{inputSam} \ ~{outputPrefix} } @@ -96,7 +96,7 @@ task GetTranscriptCleanStats { parameter_meta { # inputs - transcriptCleanSamFile: {description: "Output sam file from transcriptclean", category: "required"} + inputSam: {description: "Output sam file from transcriptclean", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -157,7 +157,7 @@ task TranscriptClean { output { File fastaFile = outputPrefix + "_clean.fa" File logFile = outputPrefix + "_clean.log" - File samFile = outputPrefix + "_clean.sam" + File outputSam = outputPrefix + "_clean.sam" File logFileTE = outputPrefix + "_clean.TE.log" } @@ -194,7 +194,7 @@ task TranscriptClean { # outputs fastaFile: {description: "Fasta file containing corrected reads."} logFile: {description: "Log file of transcriptclean run."} - samFile: {description: "Sam file containing corrected aligned reads."} + outputSam: {description: "Sam file containing corrected aligned reads."} logFileTE: {description: "TE log file of transcriptclean run."} } } From fe7b8a1edbf5cbf3ab766a67c9428d430807204f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 12:06:57 +0200 Subject: [PATCH 0529/1208] Fix tests. --- talon.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/talon.wdl b/talon.wdl index c796c1ce..c11ab9e0 100644 --- a/talon.wdl +++ b/talon.wdl @@ -322,7 +322,7 @@ task InitializeTalonDatabase { } output { - File database = outputPrefix + ".db" + File databaseFile = outputPrefix + ".db" } runtime { @@ -346,13 +346,13 @@ task InitializeTalonDatabase { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - database: {description: "Talon database."} + databaseFile: {description: "Talon database."} } } task LabelReads { input { - File samFile + File inputSam File referenceGenome Int fracaRangeSize = 20 String tmpDir = "./tmp_label_reads" @@ -369,7 +369,7 @@ task LabelReads { set -e mkdir -p "$(dirname ~{outputPrefix})" talon_label_reads \ - --f=~{samFile} \ + --f=~{inputSam} \ --g=~{referenceGenome} \ --t=~{threads} \ --ar=~{fracaRangeSize} \ @@ -392,7 +392,7 @@ task LabelReads { parameter_meta { # inputs - samFile: {description: "Sam file of transcripts.", category: "required"} + inputSam: {description: "Sam file of transcripts.", category: "required"} referenceGenome: {description: "Reference genome fasta file.", category: "required"} fracaRangeSize: {description: "Size of post-transcript interval to compute fraction.", category: "common"} tmpDir: {description: "Path to directory for tmp files.", category: "advanced"} From 5010738cf4cf0ac034ff5b0418938e9ffe77a518 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 12:27:18 +0200 Subject: [PATCH 0530/1208] Update CHANGELOG. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9796c484..91698644 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl, + transcriptclean.wdl to be more descriptive. + Renamed outputs of tasks used in the TALON-WDL, PacBio-subreads-processing & sequence-classification pipelines. + Reworked bcf2vcf task into bcftools view task. From d8a159f32dd321b87fd76c1ca4522e109eb5e0fc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 12:33:57 +0200 Subject: [PATCH 0531/1208] Fix tests. --- transcriptclean.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 1eea686c..79661307 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -61,6 +61,7 @@ task GetSJsFromGtf { memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs spliceJunctionFile: {description: "Extracted splice junctions."} } @@ -140,7 +141,7 @@ task TranscriptClean { -g ~{referenceGenome} \ -t ~{cores} \ --maxLenIndel=~{maxLenIndel} \ - --maxSJOffset=~{maxSJoffset} \ + --maxSJOffset=~{maxSJOffset} \ -o ~{outputPrefix} \ ~{true="-m true" false="-m false" correctMismatches} \ ~{true="-i true" false="-i false" correctIndels} \ From fd4e8619a2838b533796bad70a73d6e21032a27f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 22 Jul 2020 12:45:51 +0200 Subject: [PATCH 0532/1208] Update CHANGELOG. --- CHANGELOG.md | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91698644..95241551 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,13 +16,13 @@ version 4.0.0-develop + Renamed outputs of tasks used in the TALON-WDL, PacBio-subreads-processing & sequence-classification pipelines. + Reworked bcf2vcf task into bcftools view task. -+ Removed the redundant format flag from the htseq interface. This is ++ Removed the redundant format flag from the htseq interface. This is autodetected in newer versions of htseq. + Update docker images for samtools, bcftools, picard, GATK, cutadapt, htseq and chunked-scatter. + Default docker images for bwa, bwakit and hisat2 updated to include samtools 1.10. -+ Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 ++ Alignment tasks (STAR, Hisat2, BWA) now produce BAM files at level 1 compression. + Hisat2 task has added controls for samtools. + Alignment tasks no longer produce BAM indexes as these are not needed @@ -34,18 +34,18 @@ version 4.0.0-develop BAM files. + Changed PicardMarkduplicates to use COMPRESSION_LEVEL=1 by default with the htsjdk deflater. - This makes the task finish in 32% less time at the cost of a 8% larger BAM - file. + This makes the task finish in 32% less time at the cost of a 8% larger BAM + file. + Added sambamba markdup and sambamba sort. NOTE: samtools sort is more efficient and is recommended. + Correctly represent samtools inconsistent use of the threads flag. Sometimes it means 'threads' sometimes it means 'additional threads'. - BioWDL tasks now use only threads. The `threads - 1` conversion is + BioWDL tasks now use only threads. The `threads - 1` conversion is applied where necessary for samtools tools that use additional threads. + Updated BWA MEM and BWA KIT tasks to use samtools sort version 1.10 for sorting the BAM file. -+ Updated memory requirements on bcftools Stats, bwa mem, bwakit, GATK - ApplyBQSR, GATK BaseRecalibrator, GATK GatherBqsrReports, Gatk ++ Updated memory requirements on bcftools Stats, bwa mem, bwakit, GATK + ApplyBQSR, GATK BaseRecalibrator, GATK GatherBqsrReports, Gatk HaplotypeCaller, Picard CollectMultipleMetrics, Picard GatherBamFiles, samtools Flagstat, samtools sort and bcftools stats. + TALON: Update `FilterTalonTranscripts` to new version, which removes the @@ -54,13 +54,13 @@ version 4.0.0-develop + TALON: Update to version 5.0. + Add tasks for pbmm2, the PacBio wrapper for minimap2. + Update the image for chunked-scatter and make use of new features from 0.2.0. -+ Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and ++ Tuned resource requirements for GATK VariantEval, MultiQC, Picard metrics and STAR. -+ Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) - that replaces biopet-scatterregions. ++ Added a new task for [scatter-regions](https://github.com/biowdl/chunked-scatter) + that replaces biopet-scatterregions. + The FastQC task now talks to the Java directly instead of using the included Perl wrapper for FastQC. This has the advantage that memory and threads can - be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) + be set independently. A rather high maximum heap size of 1750MB (Xmx1750M) was set, as OOM errors occurred frequently on some fastqs. + STAR: Add options regarding alignment score (regarding read length as well) for tweaking when processing rRNA depleted samples. @@ -82,12 +82,12 @@ version 4.0.0-develop opposed to virtual memory). + Added `-XX:ParallelGCThreads=1` to the java options of java tasks. + Added `timeMinutes` input to many tasks, this indicates a maximum - number of minutes that the job will run. The associated runtime + number of minutes that the job will run. The associated runtime attribute is `time_minutes` which can be used to inform a scheduler (eg. slurm) of the run time of the job. + Added STAR GenomeGenerate task. -+ GATK.HaplotypeCaller: Add `--dont-use-soft-clipped-bases` and - `--standard-min-confidence-threshold-for-calling` options. These are ++ GATK.HaplotypeCaller: Add `--dont-use-soft-clipped-bases` and + `--standard-min-confidence-threshold-for-calling` options. These are required for RNA seq variant calling according to GATK best practices. + Samtools: Fix quotations in sort command. + Samtools SortByName is now called Sort. @@ -100,7 +100,6 @@ version 4.0.0-develop + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. - version 3.1.0 --------------------------- + Default threads for BWA in bwa.Kit task: 4. Samtools sort in the @@ -113,12 +112,11 @@ version 3.1.0 + Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. + Cutadapt now explicitly calls the `--compression-level` flag with compression - level 1 to prevent cutadapt from using very high gzip compression level 6 + level 1 to prevent cutadapt from using very high gzip compression level 6 that uses 400% more cpu time. + Update default docker image for cutadapt and fastqc. + Default number of cores for cutadapt and bwamem to 4 cores. - version 3.0.0 --------------------------- + Add optional input umiSeparator in umi-tools dedup task. @@ -129,7 +127,7 @@ version 3.0.0 + Allow setting the `--emit-ref-confidence` flag for HaplotypeCaller. + Add `--output-mode` flag to HaplotypeCaller. + Added rtg.Format and rtg.VcfEval tasks. -+ Added gatk.SelectVariants and gatk.VariantFiltration tasks. ++ Added gatk.SelectVariants and gatk.VariantFiltration tasks. + Fixed a bug where the output directory was not created for bwa.Kit. + Add vt task for variants normalization and decomposition. + Update WDL task Picard (Add task RenameSample). @@ -146,11 +144,11 @@ version 3.0.0 biopet.ScatterRegions now always returns correctly ordered scatters. + Add tasks for umi-tools dedup and extract. + Add `GenomicsDBImport` task for GATK. -+ Add `annotationGroups` input to `GenotypeGVCFs` to allow setting multiple ++ Add `annotationGroups` input to `GenotypeGVCFs` to allow setting multiple annotation groups. The `StandardAnnotation` group is still used as default. + GenotypeGVCFs, only allow one input GVCF file, as the tool also only allows - one input file. -+ Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set + one input file. ++ Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set whether output should be a GVCF. + Centrifuge: Add Krona task specific to Centrifuge. + Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located. @@ -175,7 +173,7 @@ version 3.0.0 + PreprocessIntervals + Add common.TextToFile task. + Add bedtools.Intersect. -+ Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files ++ Add `-o pipefail` to bedtools.MergeBedFiles to prevent errors in BED files from going unnoticed. + Centrifuge: Fix -1/-U options for single end data. + Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple @@ -261,7 +259,7 @@ version 1.0.0 + Common: Update dockerTag to dockerImage. + GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers. + Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls). -+ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. ++ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. + Mutect2: Add necessary missing index attribute for panel of normals. + MultiQC: Add memory variable to multiqc task. + GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need regions files as required inputs. From ccbb50c1bb5c45e60a81c3051ee6a041c0e8d6ec Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 22 Jul 2020 17:07:52 +0200 Subject: [PATCH 0533/1208] Bring changes to hisat2 too --- hisat2.wdl | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/hisat2.wdl b/hisat2.wdl index c24610ed..a86214f9 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -34,10 +34,10 @@ task Hisat2 { String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" Int threads = 4 - Int sortThreads = 1 + Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - Int memoryGb = 1 + threads + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * sortThreads + Int? memoryGb Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 # is a combination of hisat2 and samtools @@ -45,7 +45,12 @@ task Hisat2 { String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2880dd9d8ad0a7b221d4eacda9a818e92983128d-0" } - String bamIndexPath = sub(outputBam, "\.bam$", ".bai") + # Samtools sort may block the pipe while it is writing data to disk. + # This can lead to cpu underutilization. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) + Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) + Int estimatedMemoryGb = 1 + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads command { set -e -o pipefail @@ -63,7 +68,7 @@ task Hisat2 { --new-summary \ --summary-file ~{summaryFilePath} \ | samtools sort \ - ~{"-@ " + sortThreads} \ + ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ - \ @@ -76,8 +81,8 @@ task Hisat2 { } runtime { - memory: "~{memoryGb}G" - cpu: threads + 1 + memory: "~{estimatedMemoryGb}G" + cpu: threads time_minutes: timeMinutes docker: dockerImage } From f6bf488f35382c6a21095756a96b50fd54b4d818 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 22 Jul 2020 17:09:41 +0200 Subject: [PATCH 0534/1208] correct memory selection --- hisat2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hisat2.wdl b/hisat2.wdl index a86214f9..f9a4bc59 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -81,7 +81,7 @@ task Hisat2 { } runtime { - memory: "~{estimatedMemoryGb}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" cpu: threads time_minutes: timeMinutes docker: dockerImage From 15c960d2beb3423b1608b3ddf3479808e991218c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 22 Jul 2020 17:12:21 +0200 Subject: [PATCH 0535/1208] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95241551..55fb1e8a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The + number of threads is now related to the number of threads on the aligner. + Using more threads reduces the chance of the samtools sort pipe getting + blocked if it's full. + Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl, transcriptclean.wdl to be more descriptive. + Renamed outputs of tasks used in the TALON-WDL, PacBio-subreads-processing & From d15ee30f59bc8f16f5e4702ba2e35a76e8ead10c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 08:26:50 +0200 Subject: [PATCH 0536/1208] Combine BWA and BW kit tasks --- bwa.wdl | 85 +++++---------------------------------------------------- 1 file changed, 7 insertions(+), 78 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 58e1dc80..cdaed83a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -21,78 +21,6 @@ version 1.0 # SOFTWARE. task Mem { - input { - File read1 - File? read2 - BwaIndex bwaIndex - String outputPath - String? readgroup - - Int threads = 4 - Int? sortThreads - Int sortMemoryPerThreadGb = 2 - Int compressionLevel = 1 - Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads) - # This container contains: samtools (1.10), bwa (0.7.17-r1188) - String dockerImage = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0" - } - - # Samtools sort may block the pipe while it is writing data to disk. - # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. - Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) - Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - - command { - set -e -o pipefail - mkdir -p "$(dirname ~{outputPath})" - bwa mem \ - ~{"-t " + threads} \ - ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ - ~{bwaIndex.fastaFile} \ - ~{read1} \ - ~{read2} \ - | samtools sort \ - ~{"-@ " + totalSortThreads} \ - -m ~{sortMemoryPerThreadGb}G \ - -l ~{compressionLevel} \ - - \ - -o ~{outputPath} - } - - output { - File outputBam = outputPath - } - - runtime { - cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - read1: {description: "The first or single end fastq file.", category: "required"} - read2: {description: "The second end fastq file.", category: "common"} - bwaIndex: {description: "The BWA index files.", category: "required"} - outputPath: {description: "The location the output BAM file should be written to.", category: "required"} - readgroup: {description: "The readgroup to be assigned to the reads. See BWA mem's `-R` option.", category: "common"} - - threads: {description: "The number of threads to use.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task Kit { input { File read1 File? read2 @@ -100,7 +28,7 @@ task Kit { String outputPrefix String? readgroup Boolean sixtyFour = false - + Boolean usePostalt = false Int threads = 4 Int? sortThreads Int sortMemoryPerThreadGb = 2 @@ -118,6 +46,8 @@ task Kit { Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + String bwaKitCommand = "bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}" + (if sixtyFour then ".64.alt" else ".alt") + " | " + String kitCommandString = if usePostalt then bwaKitCommand else "" command { set -e @@ -129,10 +59,8 @@ task Kit { ~{read1} \ ~{read2} \ 2> ~{outputPrefix}.log.bwamem | \ - bwa-postalt.js \ - -p ~{outputPrefix}.hla \ - ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ - samtools sort \ + ~{kitCommandString} \ + samtools sort \ ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ -l ~{compressionLevel} \ @@ -157,7 +85,8 @@ task Kit { # inputs read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} - bwaIndex: {description: "The BWA index, including a .alt file.", category: "required"} + bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} From 830eb51555889da4c3733ad5c7bbea4528a57887 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 09:26:14 +0200 Subject: [PATCH 0537/1208] Enable/disable postalt with comment --- bwa.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index cdaed83a..f2c731f2 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -46,9 +46,8 @@ task Mem { Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - String bwaKitCommand = "bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}" + (if sixtyFour then ".64.alt" else ".alt") + " | " - String kitCommandString = if usePostalt then bwaKitCommand else "" + # The bwa postalt script is out commented as soon as usePostalt = false. It is a hack but it should work. command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -59,7 +58,7 @@ task Mem { ~{read1} \ ~{read2} \ 2> ~{outputPrefix}.log.bwamem | \ - ~{kitCommandString} \ + ~{true="" false="#" usePostalt} bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ samtools sort \ ~{"-@ " + totalSortThreads} \ -m ~{sortMemoryPerThreadGb}G \ @@ -70,6 +69,7 @@ task Mem { output { File outputBam = outputPrefix + ".aln.bam" + File? outputHla = outputPrefix + ".hla" } runtime { From 2b073f668e17643ac393d012986b9bcc3096978f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 09:30:09 +0200 Subject: [PATCH 0538/1208] Add comments on comments --- bwa.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index f2c731f2..fdeb870f 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -47,7 +47,8 @@ task Mem { # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - # The bwa postalt script is out commented as soon as usePostalt = false. It is a hack but it should work. + # The bwa postalt script is out commented as soon as usePostalt = false. + # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e mkdir -p "$(dirname ~{outputPrefix})" From 997b7765a0403778ad842ae2a8e1c50f38bfd05b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 09:55:49 +0200 Subject: [PATCH 0539/1208] Add bwa-mem2 task --- bwa-mem2.wdl | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 bwa-mem2.wdl diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl new file mode 100644 index 00000000..df3801b4 --- /dev/null +++ b/bwa-mem2.wdl @@ -0,0 +1,112 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Mem { + # NOTE: THIS IS A COPY OF THE BWA TASK WITH ONLY bwa CHANGED TO bwa-mem2 AND A DIFFERENT DOCKER IMAGE. + input { + File read1 + File? read2 + BwaIndex bwaIndex + String outputPrefix + String? readgroup + Boolean sixtyFour = false + Boolean usePostalt = false + Int threads = 4 + Int? sortThreads + Int sortMemoryPerThreadGb = 2 + Int compressionLevel = 1 + Int? memoryGb + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 + String dockerImage = "biowdl/bwamem2-kit:2.0-dev" + } + + # Samtools sort may block the pipe while it is writing data to disk. + # This can lead to cpu underutilization. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) + Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + + # The bwa postalt script is out commented as soon as usePostalt = false. + # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bwa-mem2 mem \ + -t ~{threads} \ + ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ + ~{bwaIndex.fastaFile} \ + ~{read1} \ + ~{read2} \ + 2> ~{outputPrefix}.log.bwamem | \ + ~{true="" false="#" usePostalt} bwa-postalt.js -p ~{outputPrefix}.hla ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \ + samtools sort \ + ~{"-@ " + totalSortThreads} \ + -m ~{sortMemoryPerThreadGb}G \ + -l ~{compressionLevel} \ + - \ + -o ~{outputPrefix}.aln.bam + } + + output { + File outputBam = outputPrefix + ".aln.bam" + File? outputHla = outputPrefix + ".hla" + } + + runtime { + # One extra thread for bwa-postalt + samtools is not needed. + # These only use 5-10% of compute power and not always simultaneously. + cpu: threads + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + read1: {description: "The first-end fastq file.", category: "required"} + read2: {description: "The second-end fastq file.", category: "common"} + bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} + outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + + # outputs + outputBam: "The produced BAM file." + } +} + +struct BwaIndex { + File fastaFile + Array[File] indexFiles +} From e968433fdc7d7f26986ddd1ba264f80dd7579d37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Thu, 23 Jul 2020 15:34:30 +0200 Subject: [PATCH 0540/1208] Update image --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index df3801b4..5ac6958e 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 - String dockerImage = "biowdl/bwamem2-kit:2.0-dev" + String dockerImage = "biowdl/bwamem2-kit:2.0-dev2" # TODO: Update to biocontainer. } # Samtools sort may block the pipe while it is writing data to disk. From d27eea90b9aa3b3683de5522f8f5cb541ec86211 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 27 Jul 2020 10:21:55 +0200 Subject: [PATCH 0541/1208] update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55fb1e8a..d3d719f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ A bwa-mem2 task was created with the same interface (including usePostalt) + as the bwa mem task. ++ bwa mem and bwa kit are now one task. The usePostalt boolean can be used to + switch the postalt script on and off. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. Using more threads reduces the chance of the samtools sort pipe getting From 18fb322e8b24dee1292f56b6245dc26325eb5ffd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 27 Jul 2020 15:59:21 +0200 Subject: [PATCH 0542/1208] use mulled biocontainer --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 5ac6958e..d3290d0b 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 - String dockerImage = "biowdl/bwamem2-kit:2.0-dev2" # TODO: Update to biocontainer. + String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } # Samtools sort may block the pipe while it is writing data to disk. From 6eb33c3dab050b2b1d8b34183caeaf8a8026407b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 28 Jul 2020 07:51:04 +0200 Subject: [PATCH 0543/1208] Set compression level to 1 and higher time estimate for mergevcf task --- picard.wdl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index adb55b4b..1afa5ea7 100644 --- a/picard.wdl +++ b/picard.wdl @@ -553,8 +553,14 @@ task MergeVCFs { String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) + Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + Int compressionLevel = 1 + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. + # NOTE: this might change in the future when the intel deflater is updated! + Boolean useJdkDeflater = true + } # Using MergeVcfs instead of GatherVcfs so we can create indices @@ -566,7 +572,10 @@ task MergeVCFs { picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ MergeVcfs \ INPUT=~{sep=' INPUT=' inputVCFs} \ - OUTPUT=~{outputVcfPath} + OUTPUT=~{outputVcfPath} \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -592,6 +601,9 @@ task MergeVCFs { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} } } From 9a9bf3f5b1bcef7669b05fcc132caadf411e1140 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 28 Jul 2020 15:51:43 +0200 Subject: [PATCH 0544/1208] add gridss task --- gridss.wdl | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 gridss.wdl diff --git a/gridss.wdl b/gridss.wdl new file mode 100644 index 00000000..37ac83b7 --- /dev/null +++ b/gridss.wdl @@ -0,0 +1,65 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import "bwa.wdl" as bwa + +task GRIDSS { + input { + File tumorBam + File tumorBai + String tumorLabel + File? normalBam + File? normalBai + String? normalLabel + BwaIndex reference + String outputPrefix = "gridss" + + Int threads = 1 + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + } + + command { + gridss \ + --reference ~{reference.fastaFile} \ + --output ~{outputPrefix}.vcf.gz \ + --assembly ~{outputPrefix}_assembly.bam \ + ~{"-t " + threads} \ + --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + ~{normalBam} \ + ~{tumorBam} + tabix -p vcf ~{outputPrefix}.vcf.gz + samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai + } + + output { + File vcf = outputPrefix + ".vcf.gz" + File vcfIndex = outputPrefix + ".vcf.gz.tbi" + File assembly = outputPrefix + "_assembly.bam" + File assemblyIndex = outputPrefix + "_assembly.bai" + } + + runtime { + cpu: threads + memory: "32G" + docker: dockerImage + } +} \ No newline at end of file From 20068636fe79595050396d2bdc665fde8126bf33 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jul 2020 10:17:58 +0200 Subject: [PATCH 0545/1208] slightly tune memory requirements --- bwa-mem2.wdl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index d3290d0b..6ea4578d 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -21,7 +21,6 @@ version 1.0 # SOFTWARE. task Mem { - # NOTE: THIS IS A COPY OF THE BWA TASK WITH ONLY bwa CHANGED TO bwa-mem2 AND A DIFFERENT DOCKER IMAGE. input { File read1 File? read2 @@ -45,8 +44,13 @@ task Mem { # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + # BWA-mem2's index files contain 2 BWT indexes of which only one is used. .2bit64 is used by default and + # .8bit32 is used for avx2. + # The larger one of these is the 8bit32 index. Since we do not know beforehand which one is used we need to accomodate for that. + # Using only the 8bit32 index uses 57,5% of the index files. Since bwa-mem2 uses slightly more memory than the index + # We put it at 62% as a safety factor. That means the memory usage for bwa-mem will be 53G for a human genome. Resulting in 60G total + # on 8 cores with samtools with 3 sort threads. + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 0.62) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. From ed49efcbda3f90819dbc8561be89690268f23dd9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jul 2020 13:17:18 +0200 Subject: [PATCH 0546/1208] Update default cutadapt image --- cutadapt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index d125af43..7faeaff1 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -81,7 +81,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37h516909a_0" + String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) From be890eb56a43ea86d32ead9c5c9b85d134d01166 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jul 2020 13:22:52 +0200 Subject: [PATCH 0547/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55fb1e8a..255c2186 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. Using more threads reduces the chance of the samtools sort pipe getting From 94128f4fe9fee9bfc88b7c96c07768141c197fa7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 29 Jul 2020 13:40:05 +0200 Subject: [PATCH 0548/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 255c2186..67ea94a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.0.0-develop --------------------------- ++ Added a task for GRIDSS. + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. From 247561f6bfe261744902980621ad133f2ba8d971 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 29 Jul 2020 14:30:09 +0200 Subject: [PATCH 0549/1208] add parameter_meta to gridss --- gridss.wdl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 37ac83b7..14bc441c 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -38,6 +38,8 @@ task GRIDSS { } command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" gridss \ --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ @@ -62,4 +64,18 @@ task GRIDSS { memory: "32G" docker: dockerImage } + + parameter_meta { + tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"} + tumorBai: {description: "The index for tumorBam.", category: "required"} + tumorLabel: {description: "The name of the (tumor) sample.", category: "required"} + normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} + normalBai: {description: "The index for normalBam.", category: "advanced"} + normalLabel: {description: "The name of the normal sample.", category: "advanced"} + BwaIndex reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} + + threads: {description: "The number of the threads to use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } } \ No newline at end of file From 82f0cc79f1d2d49a5d34c27ea743f1be7655d7f5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 29 Jul 2020 14:38:13 +0200 Subject: [PATCH 0550/1208] fix parameter_meta --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 14bc441c..3d4b7d73 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -72,7 +72,7 @@ task GRIDSS { normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} - BwaIndex reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} From 0ecfa670f78336f0fc876ba2a44f6601971f4ca1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 5 Aug 2020 10:18:11 +0200 Subject: [PATCH 0551/1208] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 325a129c..c0b48b0a 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 325a129c14de56b2055ee0e9e0da7dc74df5fec4 +Subproject commit c0b48b0a916913d1e6751d7744d1cec37559a81f From d1e2d6e56131432ea941722aa1b6ac3527d2b02d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 5 Aug 2020 10:37:36 +0200 Subject: [PATCH 0552/1208] set version in changelog to stable version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 255c2186..96f4559c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 4.0.0-develop +version 4.0.0 --------------------------- + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The From 5d5a335ae7791d360af366db3ce461bc6c07ca7e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 5 Aug 2020 10:40:12 +0200 Subject: [PATCH 0553/1208] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 944880fa..ee74734a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0 +4.1.0 From d1922724faf06dac8e835c395fc37e5d5e64f515 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 6 Aug 2020 13:21:07 +0200 Subject: [PATCH 0554/1208] add missing category for outputType in bcftools view --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 2677899b..8875903b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -58,7 +58,7 @@ task View { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 8591feb0815fd44472761359244c4ee6c6d45752 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 7 Aug 2020 13:42:20 +0200 Subject: [PATCH 0555/1208] Add pacbio bam2fastx tool. --- CHANGELOG.md | 4 ++ bam2fastx.wdl | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 bam2fastx.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index 96f4559c..272499c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.0-dev +--------------------------- ++ Add wdl file for pacbio's bam2fastx tool. + version 4.0.0 --------------------------- + Picard MergeVcf now uses compression level 1 by default. diff --git a/bam2fastx.wdl b/bam2fastx.wdl new file mode 100644 index 00000000..09c56897 --- /dev/null +++ b/bam2fastx.wdl @@ -0,0 +1,129 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Bam2Fasta { + input { + File inputFile + String outputPrefix + Int compressionLevel = 1 + Boolean uncompressedOutput = false + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bam2fasta \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} + ~{true="-u" false="" uncompressedOutput} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + ~{inputFile} + } + + output { + File fastaFile = outputPrefix + ".fasta.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input pacbio bam file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastaFile: {description: "The fasta output file."} + } +} + +task Bam2Fastq { + input { + File inputFile + String outputPrefix + Int compressionLevel = 1 + Boolean uncompressedOutput = false + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + bam2fastq \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} + ~{true="-u" false="" uncompressedOutput} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + ~{inputFile} + } + + output { + File fastaFile = outputPrefix + ".fastq.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input pacbio bam file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastaFile: {description: "The fastq output file."} + } +} From 9ad9425766843e2706ff440457d6ec1d8b21916b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 7 Aug 2020 13:44:02 +0200 Subject: [PATCH 0556/1208] Correct output naming. --- bam2fastx.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 09c56897..21f1c604 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -102,7 +102,7 @@ task Bam2Fastq { } output { - File fastaFile = outputPrefix + ".fastq.gz" + File fastqFile = outputPrefix + ".fastq.gz" } runtime { @@ -124,6 +124,6 @@ task Bam2Fastq { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - fastaFile: {description: "The fastq output file."} + fastqFile: {description: "The fastq output file."} } } From aea639c83bc4b306df01986f55f4e774208e8a8e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 10 Aug 2020 11:27:23 +0200 Subject: [PATCH 0557/1208] Add index input to the tasks. --- bam2fastx.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 21f1c604..27ed15cc 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -23,6 +23,7 @@ version 1.0 task Bam2Fasta { input { File inputFile + File bamIndex String outputPrefix Int compressionLevel = 1 Boolean uncompressedOutput = false @@ -60,6 +61,7 @@ task Bam2Fasta { parameter_meta { # inputs inputFile: {description: "The input pacbio bam file.", category: "required"} + bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} @@ -77,6 +79,7 @@ task Bam2Fasta { task Bam2Fastq { input { File inputFile + File bamIndex String outputPrefix Int compressionLevel = 1 Boolean uncompressedOutput = false @@ -114,6 +117,7 @@ task Bam2Fastq { parameter_meta { # inputs inputFile: {description: "The input pacbio bam file.", category: "required"} + bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} From 628b9169e7791eaad69b3c58f3f0b324a529be12 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 10 Aug 2020 13:25:23 +0200 Subject: [PATCH 0558/1208] Add missing ". --- bam2fastx.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 27ed15cc..ccea6edb 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -39,9 +39,10 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + chmod 755 ~{inputFile} bam2fasta \ --output ~{outputPrefix} \ - -c ~{compressionLevel} + -c ~{compressionLevel} \ ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ @@ -95,9 +96,10 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + chmod 755 ~{inputFile} bam2fastq \ --output ~{outputPrefix} \ - -c ~{compressionLevel} + -c ~{compressionLevel} \ ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ From 4da76be86e4cfe93a63ab0700468c1be9f572683 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 10 Aug 2020 14:05:35 +0200 Subject: [PATCH 0559/1208] Remove left-over chmod. --- bam2fastx.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index ccea6edb..f9699d3b 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -39,7 +39,6 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - chmod 755 ~{inputFile} bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ @@ -96,7 +95,6 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - chmod 755 ~{inputFile} bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ From adac77e53089d7875c83ad16bb7271621c30abcb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 10:30:53 +0200 Subject: [PATCH 0560/1208] Remove uncompressed output options. --- bam2fastx.wdl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index f9699d3b..5e5fb50a 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -26,7 +26,6 @@ task Bam2Fasta { File bamIndex String outputPrefix Int compressionLevel = 1 - Boolean uncompressedOutput = false Boolean splitByBarcode = false String? seqIdPrefix @@ -42,7 +41,6 @@ task Bam2Fasta { bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ - ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ ~{inputFile} @@ -64,7 +62,6 @@ task Bam2Fasta { bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} - uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} @@ -82,7 +79,6 @@ task Bam2Fastq { File bamIndex String outputPrefix Int compressionLevel = 1 - Boolean uncompressedOutput = false Boolean splitByBarcode = false String? seqIdPrefix @@ -98,7 +94,6 @@ task Bam2Fastq { bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ - ~{true="-u" false="" uncompressedOutput} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ ~{inputFile} @@ -120,7 +115,6 @@ task Bam2Fastq { bamIndex: {description: "The .pbi index for the input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} - uncompressedOutput: {description: "Do not compress. In this case, we will not add .gz, and we ignore any -c setting.", category: "advanced"} splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From 10236b504fe1e272690e7976f4c281d0cfa13027 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 11 Aug 2020 13:23:33 +0200 Subject: [PATCH 0561/1208] update CHANGELOG --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0964883..8a882f09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,13 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.0-dev +version 4.1.0-dev --------------------------- ++ Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. version 4.0.0 --------------------------- -+ Added a task for GRIDSS. + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. From 710cb79676d11663c8d951373265c9c80325aee3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 14:14:55 +0200 Subject: [PATCH 0562/1208] Fix index localization. --- CHANGELOG.md | 2 ++ bam2fastx.wdl | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 272499c4..af613151 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add copy command to bam2fastx tasks to make sure bam file and its index are + always in the same directory. + Add wdl file for pacbio's bam2fastx tool. version 4.0.0 diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 5e5fb50a..6a09202f 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -24,6 +24,7 @@ task Bam2Fasta { input { File inputFile File bamIndex + String basenameInputFile = basename(inputFile) String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -38,12 +39,16 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + # The bam file and its index need to be in the same directory. + # Cromwell will put them in separate iputs folders. + cp ~{inputFile} ./ + cp ~{bamIndex} ./ bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ./~{basenameInputFile} } output { @@ -76,6 +81,7 @@ task Bam2Fasta { task Bam2Fastq { input { File inputFile + String basenameInputFile = basename(inputFile) File bamIndex String outputPrefix Int compressionLevel = 1 @@ -91,12 +97,16 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + # The bam file and its index need to be in the same directory. + # Cromwell will put them in separate iputs folders. + cp ~{inputFile} ./ + cp ~{bamIndex} ./ bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ./~{basenameInputFile} } output { From 48351b26c1ab7caad71432b84a0fcfa51c9f4388 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Aug 2020 14:25:28 +0200 Subject: [PATCH 0563/1208] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4446daa8..ab8d8867 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 4.1.0-dev +version 5.0.0-dev --------------------------- + A bwa-mem2 task was created with the same interface (including usePostalt) as the bwa mem task. From 01df29baef579cce73a32cd109d5405d45c197bd Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 15:18:17 +0200 Subject: [PATCH 0564/1208] Change lima outputs. --- CHANGELOG.md | 3 +-- bam2fastx.wdl | 14 ++------------ lima.wdl | 9 ++++++--- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b839b35b..57bbfecb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,7 @@ that users understand how the changes affect the new version. version 4.1.0-dev --------------------------- -+ Add copy command to bam2fastx tasks to make sure bam file and its index are - always in the same directory. ++ Remove globs from lima outputs. + Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 6a09202f..5e5fb50a 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -24,7 +24,6 @@ task Bam2Fasta { input { File inputFile File bamIndex - String basenameInputFile = basename(inputFile) String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -39,16 +38,12 @@ task Bam2Fasta { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - # The bam file and its index need to be in the same directory. - # Cromwell will put them in separate iputs folders. - cp ~{inputFile} ./ - cp ~{bamIndex} ./ bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ./~{basenameInputFile} + ~{inputFile} } output { @@ -81,7 +76,6 @@ task Bam2Fasta { task Bam2Fastq { input { File inputFile - String basenameInputFile = basename(inputFile) File bamIndex String outputPrefix Int compressionLevel = 1 @@ -97,16 +91,12 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" - # The bam file and its index need to be in the same directory. - # Cromwell will put them in separate iputs folders. - cp ~{inputFile} ./ - cp ~{bamIndex} ./ bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ./~{basenameInputFile} + ~{inputFile} } output { diff --git a/lima.wdl b/lima.wdl index 2e8a7085..1a40b1c8 100644 --- a/lima.wdl +++ b/lima.wdl @@ -95,12 +95,15 @@ task Lima { cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" + find . -path "*.bam" > bamFiles.txt + find . -path "*.bam.pbi" > bamIndexes.txt + find . -path "*.subreadset.xml" > subreadsets.txt } output { - Array[File] limaBam = glob("*.bam") - Array[File] limaBamIndex = glob("*.bam.pbi") - Array[File] limaXml = glob("*.subreadset.xml") + Array[File] limaBam = read_lines("bamFiles.txt") + Array[File] limaBamIndex = read_lines("bamIndexes.txt") + Array[File] limaXml = read_lines("subreadsets.txt") File limaStderr = outputPrefix + ".fl.stderr.log" File limaJson = outputPrefix + ".fl.json" File limaCounts = outputPrefix + ".fl.lima.counts" From af73c53935206dd54b37079e1c8d6a5b053c4a46 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 11 Aug 2020 17:50:15 +0200 Subject: [PATCH 0565/1208] Change inputs to arrays. --- CHANGELOG.md | 3 ++- bam2fastx.wdl | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57bbfecb..eb2ef271 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,8 @@ that users understand how the changes affect the new version. version 4.1.0-dev --------------------------- -+ Remove globs from lima outputs. ++ Bam2fastx: Input bam and index are now arrays. ++ Lima: Remove globs from outputs. + Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 5e5fb50a..a8f1342c 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -22,8 +22,8 @@ version 1.0 task Bam2Fasta { input { - File inputFile - File bamIndex + Array[File]+ inputFile + Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -43,7 +43,7 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ~{sep=" " inputFile} } output { @@ -58,8 +58,8 @@ task Bam2Fasta { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file.", category: "required"} - bamIndex: {description: "The .pbi index for the input file.", category: "required"} + inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} @@ -75,8 +75,8 @@ task Bam2Fasta { task Bam2Fastq { input { - File inputFile - File bamIndex + Array[File]+ inputFile + Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 Boolean splitByBarcode = false @@ -96,7 +96,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{inputFile} + ~{sep=" " inputFile} } output { @@ -111,8 +111,8 @@ task Bam2Fastq { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file.", category: "required"} - bamIndex: {description: "The .pbi index for the input file.", category: "required"} + inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} From a4af699b14f325e1729e307a21058cb25da0d251 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 15:06:02 +0200 Subject: [PATCH 0566/1208] update gridss: add --jvmheap parameter --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 3d4b7d73..4ba4bc17 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,6 +33,7 @@ task GRIDSS { BwaIndex reference String outputPrefix = "gridss" + String jvmheapsize = "25G" Int threads = 1 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -45,6 +46,7 @@ task GRIDSS { --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ + ~{"--jvmheap " + jvmheapsize} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{normalBam} \ ~{tumorBam} From 9bfe4ebf231bd307dc546dff34c8b96823058718 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 15:09:40 +0200 Subject: [PATCH 0567/1208] update task/CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a882f09..2360a877 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 4.1.0-dev --------------------------- ++ Updated task gridss.wdl: add --jvmheap parameter + Added a task for GRIDSS. + Add wdl file for pacbio's bam2fastx tool. From ac3ee59598026cb22cf40325dbf32b0bc5e988fb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 13 Aug 2020 15:17:53 +0200 Subject: [PATCH 0568/1208] Rename input files. --- bam2fastx.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index a8f1342c..42240cd4 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -22,7 +22,7 @@ version 1.0 task Bam2Fasta { input { - Array[File]+ inputFile + Array[File]+ bam Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 @@ -43,7 +43,7 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " inputFile} + ~{sep=" " bam} } output { @@ -58,7 +58,7 @@ task Bam2Fasta { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bam: {description: "The input pacbio bam file(s).", category: "required"} bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} @@ -75,7 +75,7 @@ task Bam2Fasta { task Bam2Fastq { input { - Array[File]+ inputFile + Array[File]+ bam Array[File]+ bamIndex String outputPrefix Int compressionLevel = 1 @@ -96,7 +96,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " inputFile} + ~{sep=" " bam} } output { @@ -111,7 +111,7 @@ task Bam2Fastq { parameter_meta { # inputs - inputFile: {description: "The input pacbio bam file(s).", category: "required"} + bam: {description: "The input pacbio bam file(s).", category: "required"} bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} From 8b378196020a0a4151dbb06d2452e2e05a3c12e5 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 16:01:16 +0200 Subject: [PATCH 0569/1208] update gridss.wdl --- gridss.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 4ba4bc17..04ea2e82 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task GRIDSS { BwaIndex reference String outputPrefix = "gridss" - String jvmheapsize = "25G" + Int jvmHeapSizeGb = 1 Int threads = 1 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -46,7 +46,7 @@ task GRIDSS { --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ - ~{"--jvmheap " + jvmheapsize} \ + ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{normalBam} \ ~{tumorBam} @@ -63,7 +63,7 @@ task GRIDSS { runtime { cpu: threads - memory: "32G" + memory: "~{jvmHeapSizeGb}G" docker: dockerImage } From b654fee3d284e55e1f73f21621ee01e18fa731a8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 13 Aug 2020 16:21:30 +0200 Subject: [PATCH 0570/1208] change default jvmHeapSizeGb from 1G to 30G --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 04ea2e82..3b7859b6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task GRIDSS { BwaIndex reference String outputPrefix = "gridss" - Int jvmHeapSizeGb = 1 + Int jvmHeapSizeGb = 30 Int threads = 1 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -63,7 +63,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb}G" + memory: "~{jvmHeapSizeGb + 1}G" docker: dockerImage } From e941b853a9ff8e194c8b1af2dc28dffddb58d8be Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 08:56:34 +0200 Subject: [PATCH 0571/1208] add parameter_meta for SVcalling.gridss.jvmHeapSizeGb --- gridss.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/gridss.wdl b/gridss.wdl index 3b7859b6..3649cb1b 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -78,6 +78,7 @@ task GRIDSS { outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} + javaXmxMb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file From 1e8155c26a770e2aab4b46fcf74f5c98b4f7945d Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 09:04:25 +0200 Subject: [PATCH 0572/1208] small fix --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 3649cb1b..44b9e9f1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -78,7 +78,7 @@ task GRIDSS { outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} - javaXmxMb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file From 31b21e1e197b5c646b2ad202cd4fa56cc54816e8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 16:10:47 +0200 Subject: [PATCH 0573/1208] add bcftools annotate --- bcftools.wdl | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 8875903b..33685c33 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -22,6 +22,106 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task Annotate { + input { + File? annsFile + String? collapse + Array[String] columns = [] + String? exclude + Boolean force = false + File? headerLines + String? newId + String? include + Boolean keepSites = false + String? markSites + Boolean noVersion = false + String outputType = "z" + String? regions + File? regionsFile + File? renameChrs + Array[String] samples = [] + File? samplesFile + Boolean singleOverlaps = false + Array[String] removeAnns = [] + File inputFile + String outputPath = "output.vcf.gz" + + Int threads = 0 + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools annotate \ + -o ~{outputPath} \ + -O ~{outputType} \ + ~{"--annotations " + annsFile} \ + ~{"--collapse " + collapse} \ + ~{true="--columns" false="" length(columns) > 0} ~{sep="," columns} \ + ~{"--exclude " + exclude} \ + ~{true="--force" false="" force} \ + ~{"--header-lines " + headerLines} \ + ~{"--set-id " + newId} \ + ~{"--include " + include} \ + ~{true="--keep-sites" false="" keepSites} \ + ~{"--mark-sites " + markSites} \ + ~{true="--no-version" false="" noVersion} \ + ~{"--regions " + regions} \ + ~{"--regions-file " + regionsFile} \ + ~{"--rename-chrs " + renameChrs} \ + ~{true="--samples" false="" length(samples) > 0} ~{sep="," samples} \ + ~{"--samples-file " + samplesFile} \ + ~{true="--single-overlaps" false="" singleOverlaps} \ + ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ + ~{inputFile} + bcftools index --tbi ~{outputPath} + + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} + annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} + collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} + columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} + headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} + newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} + markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} + noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} + regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} + regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} + renameChrs: {description: "rename chromosomes according to the map in file (see man page for details).", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + samplesFile: {description: "File of samples to include.", category: "advanced"} + singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} + removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} + inputFile: {description: "A vcf or bcf file.", category: "required"} + + threads: {description: "Number of extra decompression threads [0].", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + } +} + task View { input { File inputFile From df6fe2df5e1276a39eaf6981f86b93d49cbbddda Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 16:12:38 +0200 Subject: [PATCH 0574/1208] small fix: change vcf to bcf --- delly.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index efa1bf60..f708f494 100644 --- a/delly.wdl +++ b/delly.wdl @@ -28,7 +28,7 @@ task CallSV { File bamIndex File referenceFasta File referenceFastaFai - String outputPath = "./delly/delly.vcf" + String outputPath = "./delly/delly.bcf" String memory = "15G" Int timeMinutes = 300 From 1241b96fce92f28fa747cde02081e00edb0aa506 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 17:12:04 +0200 Subject: [PATCH 0575/1208] add bcftools sort --- bcftools.wdl | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 33685c33..d72efde3 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -122,6 +122,44 @@ task Annotate { } } +task Sort { + input { + File inputFile + String outputPath = "output.vcf.gz" + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + String outputType = "z" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools sort \ + -o ~{outputPath} \ + -O ~{outputType} \ + ~{inputFile} + bcftools index --tbi ~{outputPath} + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } + + +} + task View { input { File inputFile From f07a59aede3deb6e2001e0907ce3073079a20d63 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 17:16:17 +0200 Subject: [PATCH 0576/1208] add output {} --- bcftools.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index d72efde3..520bcf15 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -142,6 +142,11 @@ task Sort { bcftools index --tbi ~{outputPath} } + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + runtime { memory: memory time_minutes: timeMinutes From 4664f90c91fd801a7cb6322cf69333fd44dfcd92 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 14 Aug 2020 17:45:12 +0200 Subject: [PATCH 0577/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1733c93f..85beb2eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. + Updated task gridss.wdl: add --jvmheap parameter From 3327f388f3ac184c1c0bc37dd2e920dc2e8e71fb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 17 Aug 2020 11:29:20 +0200 Subject: [PATCH 0578/1208] Update submodules. --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index c0b48b0a..0cca0f40 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c0b48b0a916913d1e6751d7744d1cec37559a81f +Subproject commit 0cca0f40a8e9121e8dcc9e76838f85835a0d8e94 From e554f35a07e4f6427e1d8ad1cb7ddcaf3fc50ce0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 20 Aug 2020 15:38:25 +0200 Subject: [PATCH 0579/1208] add sage task --- sage.wdl | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 sage.wdl diff --git a/sage.wdl b/sage.wdl new file mode 100644 index 00000000..dbc101dc --- /dev/null +++ b/sage.wdl @@ -0,0 +1,92 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sage { + input { + String tumorName + File tumorBam + File tumorBai + String? normalName + File? normalBam + File? normalBai + String assembly + File referenceFasta + File hotspotVcf + File panelBed + File highConfidenceBed + + Int timeMinutes = 60 #FIXME I've no idea how long this takes... + Int threads = 2 + String javaXmx = "32G" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + } + + command { + SAGE \ + -Xmx~{javaXmx} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -assembly ~{assembly} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspotVcf} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -threads ~{threads} \ + + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + time_minutes: timeMinutes + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + assembly: {description: "The assembly of the reference genomes, either hg19 or hg38.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + hotspotVcf: {description: "A VCF file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing a panel of cancer related genes.", category: "required"} + highConfidenceBed: {description: "A bed file describing high confidence regions.", category: "required"} + + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file From a6c3487834da5ce6b6a40ce2e966e9d899abb240 Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 24 Aug 2020 09:41:30 +0200 Subject: [PATCH 0580/1208] Add option to ignore masked reference --- vt.wdl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vt.wdl b/vt.wdl index d4c134b9..8a9f9de8 100644 --- a/vt.wdl +++ b/vt.wdl @@ -26,6 +26,7 @@ task Normalize { File inputVCFIndex File referenceFasta File referenceFastaFai + Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" String memory = "4G" @@ -33,9 +34,12 @@ task Normalize { } command { - set -e + set -eo pipefail mkdir -p "$(dirname ~{outputPath})" - vt normalize ~{inputVCF} -r ~{referenceFasta} | vt decompose -s - -o ~{outputPath} + vt normalize ~{inputVCF} \ + -r ~{referenceFasta} \ + ~{true="-m " false="" ignoreMaskedRef} \ + | vt decompose -s - -o ~{outputPath} } output { @@ -55,6 +59,7 @@ task Normalize { outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced" memory: {description: "The memory required to run the programs", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 47651a09cf7d3cd0fb45bdc20d5ef0227a3bbcd3 Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 24 Aug 2020 09:42:47 +0200 Subject: [PATCH 0581/1208] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85beb2eb..0d1805ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ vt: Add option to ignore masked reference. + bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. From fbad1676097484b301fed9e55b36d39dcd7a7524 Mon Sep 17 00:00:00 2001 From: Redmar Date: Mon, 24 Aug 2020 09:55:52 +0200 Subject: [PATCH 0582/1208] Add closing bracket --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 8a9f9de8..99cc1318 100644 --- a/vt.wdl +++ b/vt.wdl @@ -59,7 +59,7 @@ task Normalize { outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced" + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced"} memory: {description: "The memory required to run the programs", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From f335ac9b5d0d061fce172ebd843d76e46e3e1ed1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Aug 2020 12:56:51 +0200 Subject: [PATCH 0583/1208] adjust sage --- sage.wdl | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/sage.wdl b/sage.wdl index dbc101dc..ba0a6137 100644 --- a/sage.wdl +++ b/sage.wdl @@ -28,32 +28,27 @@ task Sage { String? normalName File? normalBam File? normalBai - String assembly File referenceFasta - File hotspotVcf - File panelBed - File highConfidenceBed + File referenceFastaDict + File referenceFastaFai + File knownHotspots + File codingRegsions Int timeMinutes = 60 #FIXME I've no idea how long this takes... - Int threads = 2 String javaXmx = "32G" + String memory = "33G" String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" } command { - SAGE \ - -Xmx~{javaXmx} \ + SAGE -Xmx~{javaXmx} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ ~{"-reference_bam " + normalBam} \ - -assembly ~{assembly} \ -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspotVcf} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -threads ~{threads} \ - + -known_hotspots ~{knownHotspots} \ + -coding_regions ~{codingRegsions} \ -out ~{outputPath} } @@ -74,12 +69,13 @@ task Sage { tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - assembly: {description: "The assembly of the reference genomes, either hg19 or hg38.", category: "required"} + normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} - hotspotVcf: {description: "A VCF file with hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file describing a panel of cancer related genes.", category: "required"} - highConfidenceBed: {description: "A bed file describing high confidence regions.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} + codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 2b8e422685de9ea6f63831d8780231a058c1b0cb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 1 Sep 2020 15:08:25 +0200 Subject: [PATCH 0584/1208] add sagev2 --- sage.wdl | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/sage.wdl b/sage.wdl index ba0a6137..ed3d0866 100644 --- a/sage.wdl +++ b/sage.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Sage { +task SageHotspot { input { String tumorName File tumorBam @@ -33,6 +33,7 @@ task Sage { File referenceFastaFai File knownHotspots File codingRegsions + String outputPath = "./sage_hotspot.vcf.gz" Int timeMinutes = 60 #FIXME I've no idea how long this takes... String javaXmx = "32G" @@ -58,7 +59,6 @@ task Sage { runtime { time_minutes: timeMinutes - cpu: threads docker: dockerImage memory: memory } @@ -77,6 +77,82 @@ task Sage { knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Sage { + input { + String tumorName + File tumorBam + String? normalName + File? normalBam + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File hotspots + File panelBed + File highConfidenceBed + String assembly = "hg38" + String outputPath = "./sage.vcf.gz" + + Int timeMinutes = 60 #FIXME I've no idea how long this takes... + String javaXmx = "32G" + String memory = "33G" + Int threads = 2 + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + } + + command { + java -Xmx~{javaXmx} \ + -cp /usr/local/share/hmftools-sage-2.2-0/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{assembly} \ + -threads ~{threads} \ + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + time_minutes: timeMinutes + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A VCF file containg hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file containing a panel of genes of intrest.", category: "required"} + highConfidenceBed: {description: "A bed file containing high confidence regions.", category: "required"} + assembly: {description: "The genome assembly used, either \"hg19\" or \"hg38\".", category: "common"} + outputPath: {description: "The path to write the output VCF to.", category: "common"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 4b249fde4a8e5558039553e4c2e7fa78a5251e6d Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 4 Sep 2020 15:37:32 +0200 Subject: [PATCH 0585/1208] replace binary digits to boolean --- survivor.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/survivor.wdl b/survivor.wdl index e5ac7b5b..b9583009 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -27,9 +27,9 @@ task Merge { Array[File] filePaths Int breakpointDistance = 1000 Int suppVecs = 2 - Int svType = 1 - Int strandType = 1 - Int distanceBySvSize = 0 + Boolean svType = true + Boolean strandType = true + Boolean distanceBySvSize = false Int minSize = 30 String outputPath = "./survivor/merged.vcf" String memory = "24G" @@ -45,9 +45,9 @@ task Merge { fileList \ ~{breakpointDistance} \ ~{suppVecs} \ - ~{svType} \ - ~{strandType} \ - ~{distanceBySvSize} \ + ~{true=1 false=0 svType} \ + ~{true=1 false=0 strandType} \ + ~{true=1 false=0 distanceBySvSize} \ ~{minSize} \ ~{outputPath} } From f12093281cb37c0521098e8377fc7ef83bc2c618 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 4 Sep 2020 15:41:04 +0200 Subject: [PATCH 0586/1208] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d1805ed..121c8768 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. + bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. From 7bc3c58d309fcb20d9769180f471d79432d2e350 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 7 Sep 2020 17:26:42 +0200 Subject: [PATCH 0587/1208] make bcftools indexing optional --- bcftools.wdl | 102 +++++++++++++++++++++++++++------------------------ 1 file changed, 54 insertions(+), 48 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 520bcf15..5d5a1ea6 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -52,6 +52,8 @@ task Annotate { String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } + Boolean indexing = if outputType == "z" then true else false + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -77,13 +79,14 @@ task Annotate { ~{true="--single-overlaps" false="" singleOverlaps} \ ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ ~{inputFile} - bcftools index --tbi ~{outputPath} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File? outputVcfIndex = outputPath + ".tbi" } runtime { @@ -132,6 +135,8 @@ task Sort { String outputType = "z" } + Boolean indexing = if outputType == "z" then true else false + command { set -e mkdir -p "$(dirname ~{outputPath})" @@ -139,12 +144,13 @@ task Sort { -o ~{outputPath} \ -O ~{outputType} \ ~{inputFile} - bcftools index --tbi ~{outputPath} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File? outputVcfIndex = outputPath + ".tbi" } runtime { @@ -165,50 +171,6 @@ task Sort { } -task View { - input { - File inputFile - String outputPath = "output.vcf.gz" - String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) - String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" - String outputType = "z" - Int compressionLevel = 1 - } - - command { - set -e - mkdir -p "$(dirname ~{outputPath})" - bcftools view \ - -o ~{outputPath} \ - -O ~{outputType} \ - -l ~{compressionLevel} \ - ~{inputFile} - bcftools index --tbi ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - inputFile: {description: "A vcf or bcf file.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - } -} - task Stats { input { File inputVcf @@ -313,3 +275,47 @@ task Stats { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} } } + +task View { + input { + File inputFile + String outputPath = "output.vcf" + Int compressionLevel = 0 + String memory = "256M" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + String outputType = if compressionLevel > 0 then "z" else "v" + Boolean indexing = if compressionLevel > 0 then true else false + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools view \ + -o ~{outputPath} \ + -l ~{compressionLevel} \ + -O ~{outputType} \ + ~{inputFile} + + ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + } + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 5781179d1b806467b8ffc8d5a39e41d6e7c58a5c Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 7 Sep 2020 17:54:35 +0200 Subject: [PATCH 0588/1208] made output extension depends on compression level --- bcftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bcftools.wdl b/bcftools.wdl index 5d5a1ea6..10db8b98 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -288,6 +288,7 @@ task View { String outputType = if compressionLevel > 0 then "z" else "v" Boolean indexing = if compressionLevel > 0 then true else false + String outputFilePath = if compressionLevel > 0 then outputPath + ".gz" else outputPath command { set -e From 33cdf52e284dd503054f2668b178662e2f7ff152 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Sep 2020 13:34:35 +0200 Subject: [PATCH 0589/1208] update collect-columns to 1.0.0 --- collect-columns.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/collect-columns.wdl b/collect-columns.wdl index e4e3a948..fe41c5e8 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -29,13 +29,14 @@ task CollectColumns { Int? separator Array[String]? sampleNames Boolean header = false + Boolean sumOnDuplicateId = false Array[String]? additionalAttributes File? referenceGtf String? featureAttribute Int memoryGb = 4 + ceil(0.5 * length(inputTables)) Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/collect-columns:0.2.0--py_1" + String dockerImage = "quay.io/biocontainers/collect-columns:1.0.0--py_0" } command { @@ -49,6 +50,7 @@ task CollectColumns { ~{"-s " + separator} \ ~{true="-n" false="" defined(sampleNames)} ~{sep=" " sampleNames} \ ~{true="-H" false="" header} \ + ~{true="-S" false="" sumOnDuplicateId} \ ~{true="-a" false="" defined(additionalAttributes)} ~{sep=" " additionalAttributes} \ ~{"-g " + referenceGtf} \ ~{"-F " + featureAttribute} @@ -72,6 +74,7 @@ task CollectColumns { separator: {description: "Equivalent to the -s option of collect-columns.", category: "advanced"} sampleNames: {description: "Equivalent to the -n option of collect-columns.", category: "advanced"} header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} + sumOnDuplicateId: {description: "Equivalent to the -S flag of collect-columns.", category: "advanced"} additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", category: "advanced"} referenceGtf: {description: "Equivalent to the -g option of collect-columns.", category: "advanced"} featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} From 452b5810a358eeb915e6c5ba98525e210262811d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Sep 2020 13:40:51 +0200 Subject: [PATCH 0590/1208] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 121c8768..a9329bf5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ collect-columns: updated docker image to version 1.0.0 and added the + `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. + bcftools: add sorting and annotation From a651adc575a7ca8707447958a84950d9378b5ee4 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 10 Sep 2020 11:53:38 +0200 Subject: [PATCH 0591/1208] add paramter meta compressionLevel --- bcftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bcftools.wdl b/bcftools.wdl index 10db8b98..affa805a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -314,6 +314,7 @@ task View { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 1643ff2c165b27ca8cacf66899c30ccad5e0f3b3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Sep 2020 16:29:07 +0200 Subject: [PATCH 0592/1208] update sage --- sage.wdl | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/sage.wdl b/sage.wdl index ba0a6137..cdce4680 100644 --- a/sage.wdl +++ b/sage.wdl @@ -31,33 +31,45 @@ task Sage { File referenceFasta File referenceFastaDict File referenceFastaFai - File knownHotspots - File codingRegsions + File hotspots + File panelBed + File highConfidenceBed + Boolean hg38 = false + String outputPath - Int timeMinutes = 60 #FIXME I've no idea how long this takes... + Int threads = 2 String javaXmx = "32G" String memory = "33G" - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" } command { - SAGE -Xmx~{javaXmx} \ + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ ~{"-reference_bam " + normalBam} \ -ref_genome ~{referenceFasta} \ - -known_hotspots ~{knownHotspots} \ - -coding_regions ~{codingRegsions} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{true="hg38" false="hg19" hg38} \ + -threads ~{threads} \ -out ~{outputPath} } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. + # This seems to be a systemic issue with R generated plots in biocontainers... } runtime { - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey cpu: threads docker: dockerImage memory: memory @@ -74,8 +86,9 @@ task Sage { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - knownHotspots: {description: "A TSV file with hotspot variant sites.", category: "required"} - codingRegsions: {description: "A bed file describing coding regions to search for inframe indels.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} + highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 5f61dd78277dd0d9b408ce866c9e9548b6f152a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Sep 2020 16:33:31 +0200 Subject: [PATCH 0593/1208] fix sage... --- sage.wdl | 89 +------------------------------------------------------- 1 file changed, 1 insertion(+), 88 deletions(-) diff --git a/sage.wdl b/sage.wdl index 251630ce..f6e8588b 100644 --- a/sage.wdl +++ b/sage.wdl @@ -20,7 +20,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task SageHotspot { +task Sage { input { String tumorName File tumorBam @@ -31,17 +31,11 @@ task SageHotspot { File referenceFasta File referenceFastaDict File referenceFastaFai -<<<<<<< HEAD File hotspots File panelBed File highConfidenceBed Boolean hg38 = false - String outputPath -======= - File knownHotspots - File codingRegsions String outputPath = "./sage_hotspot.vcf.gz" ->>>>>>> 2b8e422685de9ea6f63831d8780231a058c1b0cb Int threads = 2 String javaXmx = "32G" @@ -75,12 +69,8 @@ task SageHotspot { } runtime { -<<<<<<< HEAD time_minutes: timeMinutes # !UnknownRuntimeKey cpu: threads -======= - time_minutes: timeMinutes ->>>>>>> 2b8e422685de9ea6f63831d8780231a058c1b0cb docker: dockerImage memory: memory } @@ -108,80 +98,3 @@ task SageHotspot { category: "advanced"} } } - -task Sage { - input { - String tumorName - File tumorBam - String? normalName - File? normalBam - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File hotspots - File panelBed - File highConfidenceBed - String assembly = "hg38" - String outputPath = "./sage.vcf.gz" - - Int timeMinutes = 60 #FIXME I've no idea how long this takes... - String javaXmx = "32G" - String memory = "33G" - Int threads = 2 - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--0" - } - - command { - java -Xmx~{javaXmx} \ - -cp /usr/local/share/hmftools-sage-2.2-0/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ - -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{assembly} \ - -threads ~{threads} \ - -out ~{outputPath} - } - - output { - File outputVcf = outputPath - } - - runtime { - time_minutes: timeMinutes - cpu: threads - docker: dockerImage - memory: memory - } - - parameter_meta { - tumorName: {description: "The name of the tumor sample.", category: "required"} - tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - hotspots: {description: "A VCF file containg hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file containing a panel of genes of intrest.", category: "required"} - highConfidenceBed: {description: "A bed file containing high confidence regions.", category: "required"} - assembly: {description: "The genome assembly used, either \"hg19\" or \"hg38\".", category: "common"} - outputPath: {description: "The path to write the output VCF to.", category: "common"} - - threads: {description: "The number of threads to be used.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} \ No newline at end of file From bea730a027a6a3c27675af6e4c85bf72a9aad841 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 10:49:46 +0200 Subject: [PATCH 0594/1208] change default outputPath of sage --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index f6e8588b..71378bc7 100644 --- a/sage.wdl +++ b/sage.wdl @@ -35,7 +35,7 @@ task Sage { File panelBed File highConfidenceBed Boolean hg38 = false - String outputPath = "./sage_hotspot.vcf.gz" + String outputPath = "./sage.vcf.gz" Int threads = 2 String javaXmx = "32G" From c6d2c3ccc41031e7759655fa274ad0323362b418 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 12:02:12 +0200 Subject: [PATCH 0595/1208] change bai to bamIndex in sage --- sage.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sage.wdl b/sage.wdl index 71378bc7..79458cc1 100644 --- a/sage.wdl +++ b/sage.wdl @@ -24,10 +24,10 @@ task Sage { input { String tumorName File tumorBam - File tumorBai + File tumorBamIndex String? normalName File? normalBam - File? normalBai + File? normalBamIndex File referenceFasta File referenceFastaDict File referenceFastaFai From fb14c451e290628e6666181844c47c8716510565 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 12:14:32 +0200 Subject: [PATCH 0596/1208] fix paramter_meta --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index 79458cc1..7c04aa99 100644 --- a/sage.wdl +++ b/sage.wdl @@ -78,7 +78,7 @@ task Sage { parameter_meta { tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBai: {description: "The index of the BAM file for the tumor sample.", category: "required"} + tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} From ddf76915f2fdb19774c782a957c5403f307933a7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 11 Sep 2020 13:42:00 +0200 Subject: [PATCH 0597/1208] fix paramter_meta --- sage.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sage.wdl b/sage.wdl index 7c04aa99..ab42bee8 100644 --- a/sage.wdl +++ b/sage.wdl @@ -81,7 +81,7 @@ task Sage { tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} normalName: {description: "The name of the normal/reference sample.", category: "common"} normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBai: {description: "The index of the BAM file for the normal sample.", category: "common"} + normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} From ece83524abd6676c9666cf8027d27cdca77a7279 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 09:57:18 +0200 Subject: [PATCH 0598/1208] Add timeMinutes to Classify. --- CHANGELOG.md | 2 ++ centrifuge.wdl | 70 +++----------------------------------------------- 2 files changed, 5 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9329bf5..4c22ef8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Centrifuge: Add `timeMinutes` to `Classify` task and remove broken & + unnecessary downloading tasks. + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. diff --git a/centrifuge.wdl b/centrifuge.wdl index ee305325..bc2ea462 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -110,6 +110,7 @@ task Classify { Int threads = 4 String memory = "16G" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -150,6 +151,7 @@ task Classify { runtime { cpu: threads memory: memory + time_minutes: timeMinutes docker: dockerImage } @@ -169,6 +171,7 @@ task Classify { excludeTaxIDs: {description: "A comma-separated list of taxonomic IDs that will be excluded in classification procedure.", category: "common"} threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -233,73 +236,6 @@ task Inspect { } } -task Download { - input { - String libraryPath - Array[String]? domain - String executable = "centrifuge-download" - String? preCommand - String? seqTaxMapPath - String database = "refseq" - String? assemblyLevel - String? refseqCategory - Array[String]? taxIds - Boolean filterUnplaced = false - Boolean maskLowComplexRegions = false - Boolean downloadRnaSeqs = false - Boolean modifyHeader = false - Boolean downloadGiMap = false - } - - # This will use centrifuge-download to download. - # The bash statement at the beginning is to make sure - # the directory for the SeqTaxMapPath exists. - command { - set -e -o pipefail - ~{preCommand} - ~{"mkdir -p $(dirname " + seqTaxMapPath + ")"} - ~{executable} \ - -o ~{libraryPath} \ - ~{true='-d ' false='' defined(domain)}~{sep=',' domain} \ - ~{'-a "' + assemblyLevel + '"'} \ - ~{"-c " + refseqCategory} \ - ~{true='-t' false='' defined(taxIds)} '~{sep=',' taxIds}' \ - ~{true='-r' false='' downloadRnaSeqs} \ - ~{true='-u' false='' filterUnplaced} \ - ~{true='-m' false='' maskLowComplexRegions} \ - ~{true='-l' false='' modifyHeader} \ - ~{true='-g' false='' downloadGiMap} \ - ~{database} ~{">> " + seqTaxMapPath} - } - - output { - File seqTaxMap = "~{seqTaxMapPath}" - File library = libraryPath - Array[File] fastaFiles = glob(libraryPath + "/*/*.fna") - } - } - -task DownloadTaxonomy { - input { - String taxonomyDir - String executable = "centrifuge-download" - String? preCommand - } - - command { - set -e -o pipefail - ~{preCommand} - ~{executable} \ - -o ~{taxonomyDir} \ - taxonomy - } - - output { - File taxonomyTree = taxonomyDir + "/nodes.dmp" - File nameTable = taxonomyDir + "/names.dmp" - } - } - task KReport { input { File classification From 70747bdf89e05b3ab05cfebd75f5d13dff75741b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 10:03:12 +0200 Subject: [PATCH 0599/1208] Update CHANGELOG. --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4c22ef8b..933081d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Centrifuge: Add `timeMinutes` to `Classify` task and remove broken & - unnecessary downloading tasks. ++ Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary + downloading tasks (alternative is refseqtools). + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. From 68120ed6530bf60cc114cffdeeed143d8b132c8e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 15:19:44 +0200 Subject: [PATCH 0600/1208] Add NanoQC and NanoPlot. --- nanopack.wdl | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 nanopack.wdl diff --git a/nanopack.wdl b/nanopack.wdl new file mode 100644 index 00000000..59193f96 --- /dev/null +++ b/nanopack.wdl @@ -0,0 +1,175 @@ +version 1.0 + +# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task NanoPlot { + input { + File inputFile + String inputFileType + String outputDir + String outputPrefix + String outputPath = outputDir + outputPrefix + Boolean outputTsvStats = true + Boolean dropOutliers = false + Boolean logLengths = false + String format = "png" + Boolean showN50 = true + String title = basename(outputPrefix) + + Int? maxLength + Int? minLength + Int? minQual + String? readType + + Int threads = 2 + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/nanoplot:1.32.0--py_0" + } + + Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + NanoPlot \ + --threads ~{threads} \ + --outdir ~{outputDir} \ + --prefix ~{outputPrefix} \ + ~{true="--tsv_stats" false="" outputTsvStats} \ + ~{true="--drop_outliers" false="" dropOutliers} \ + ~{true="--loglength" false="" logLengths} \ + --format ~{format} \ + ~{true="--N50" false="--no-N50" showN50} \ + ~{fileTypeOptions[inputFileType] + inputFile} \ + ~{"--maxlength " + maxLength} \ + ~{"--minlength " + minLength} \ + ~{"--minqual " + minQual} \ + ~{"--readtype " + readType} + } + + output { + File dynamicHistogram = outputDir + outputPrefix + "Dynamic_Histogram_Read_length.html" + File readLengthHistogram = outputDir + outputPrefix + "HistogramReadlength.png" + File lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" + File lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" + File logScaleReadLengthHistogram = outputDir + outputPrefix + "LogTransformed_HistogramReadlength.png" + File report = outputDir + outputPrefix + "NanoPlot-report.html" + File weightedHistogram = outputDir + outputPrefix + "Weighted_HistogramReadlength.png" + File weightedLogScaleHistogram = outputDir + outputPrefix + "Weighted_LogTransformed_HistogramReadlength.png" + File yieldByLength = outputDir + outputPrefix + "Yield_By_Length.png" + File? stats = outputDir + outputPrefix + "NanoStats.txt" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input file.", category: "required"} + inputFileType: {description: "The format of the read file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + outputPrefix: {description: "Output file prefix.", category: "required"} + outputTsvStats: {description: "Output the stats file as a properly formatted TSV.", category: "common"} + dropOutliers: {description: "Drop outlier reads with extreme long length.", category: "advanced"} + logLengths: {description: "Additionally show logarithmic scaling of lengths in plots.", category: "advanced"} + format: {description: "Specify the output format of the plots.", category: "required"} + showN50: {description: "Show the N50 mark in the read length histogram.", category: "common"} + title: {description: "Add a title to all plots, requires quoting if using spaces.", category: "common"} + maxLength: {description: "Hide reads longer than length specified.", category: "advanced"} + minLength: {description: "Hide reads shorter than length specified.", category: "advanced"} + minQual: {description: "Drop reads with an average quality lower than specified.", category: "advanced"} + readType: {description: "Which read type to extract information about from summary. Options are 1D, 2D, 1D2", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dynamicHistogram: {description: ""} + readLengthHistogram: {description: ""} + lengthVsQualityScatterPlotDot: {description: ""} + lengthVsQualityScatterPlotKde: {description: ""} + logScaleReadLengthHistogram: {description: ""} + report: {description: ""} + weightedHistogram: {description: ""} + weightedLogScaleHistogram: {description: ""} + yieldByLength: {description: ""} + stats: {description: ""} + } +} + +task NanoQc { + input { + File inputFile + String outputDir + Boolean directRna = false + + Int? minLength + + Int threads = 2 + String memory = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputDir})" + nanoQC \ + --outdir ~{outputDir} \ + ~{true="--rna" false="" directRna} \ + ~{"--minlen " + minLength} \ + ~{inputFile} + } + + output { + File report = outputDir + "nanoQC.html" + File log = outputDir + "NanoQC.log" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input file.", category: "required"} + outputDir: {description: "Output directory path.", category: "required"} + directRna: {description: "Fastq is from direct RNA-seq and contains U nucleotides.", category: "common"} + minLength: {description: "Filters the reads on a minimal length of the given range. Also plots the given length/2 of the begin and end of the reads.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + report: {description: ""} + log: {description: ""} + } +} From 00b947f945b5da4f44812d9ea6a41347b1dc2ba7 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Sep 2020 16:56:58 +0200 Subject: [PATCH 0601/1208] Update changelog. --- CHANGELOG.md | 1 + nanopack.wdl | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 85beb2eb..7b4079cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add NanoPlot and NanoQC tasks. + bcftools: add sorting and annotation + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. diff --git a/nanopack.wdl b/nanopack.wdl index 59193f96..661f99de 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -59,11 +59,11 @@ task NanoPlot { ~{true="--loglength" false="" logLengths} \ --format ~{format} \ ~{true="--N50" false="--no-N50" showN50} \ - ~{fileTypeOptions[inputFileType] + inputFile} \ ~{"--maxlength " + maxLength} \ ~{"--minlength " + minLength} \ ~{"--minqual " + minQual} \ - ~{"--readtype " + readType} + ~{"--readtype " + readType} \ + ~{fileTypeOptions[inputFileType] + inputFile} } output { @@ -129,7 +129,6 @@ task NanoQc { Int? minLength - Int threads = 2 String memory = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" @@ -151,7 +150,6 @@ task NanoQc { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -163,7 +161,6 @@ task NanoQc { outputDir: {description: "Output directory path.", category: "required"} directRna: {description: "Fastq is from direct RNA-seq and contains U nucleotides.", category: "common"} minLength: {description: "Filters the reads on a minimal length of the given range. Also plots the given length/2 of the begin and end of the reads.", category: "advanced"} - threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 39aa53feeacf6a3d9b96c5adc2eec9c85eb92bba Mon Sep 17 00:00:00 2001 From: Jasper Date: Mon, 14 Sep 2020 16:59:12 +0200 Subject: [PATCH 0602/1208] Update CHANGELOG.md --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bd7cbf1..cf85eb0b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,10 @@ version 5.0.0-dev `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. + vt: Add option to ignore masked reference. -+ bcftools: add sorting and annotation ++ bcftools: add sorting and annotation. + Bam2fastx: Input bam and index are now arrays. + Lima: Remove globs from outputs. -+ Updated task gridss.wdl: add --jvmheap parameter ++ Updated task gridss.wdl: add --jvmheap parameter. + A bwa-mem2 task was created with the same interface (including usePostalt) as the bwa mem task. + bwa mem and bwa kit are now one task. The usePostalt boolean can be used to From 5b46df4bd5c4ecbd130de52e081b3e9258627188 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 16 Sep 2020 09:58:48 +0200 Subject: [PATCH 0603/1208] Complete parameter_meta. --- nanopack.wdl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/nanopack.wdl b/nanopack.wdl index 661f99de..ba68af1b 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -108,16 +108,16 @@ task NanoPlot { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - dynamicHistogram: {description: ""} - readLengthHistogram: {description: ""} - lengthVsQualityScatterPlotDot: {description: ""} - lengthVsQualityScatterPlotKde: {description: ""} - logScaleReadLengthHistogram: {description: ""} - report: {description: ""} - weightedHistogram: {description: ""} - weightedLogScaleHistogram: {description: ""} - yieldByLength: {description: ""} - stats: {description: ""} + dynamicHistogram: {description: "Dynamic histogram of read length."} + readLengthHistogram: {description: "Histogram of read length."} + lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} + lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} + logScaleReadLengthHistogram: {description: "Histogram of read lengths after log transformation."} + report: {description: "Html summary report."} + weightedHistogram: {description: "Weighted histogram of read lengths."} + weightedLogScaleHistogram: {description: "Weighted histogram of read lengths after log transformation."} + yieldByLength: {description: "Cumulative yield plot."} + stats: {description: "NanoStats report."} } } @@ -166,7 +166,7 @@ task NanoQc { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - report: {description: ""} - log: {description: ""} + report: {description: "Html summary report."} + log: {description: "Progress report."} } } From af550dd024ff6fe5df365ebec58808f8517b2516 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 16 Sep 2020 12:11:06 +0200 Subject: [PATCH 0604/1208] Make some outputs optional. --- nanopack.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/nanopack.wdl b/nanopack.wdl index ba68af1b..6860cf13 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -69,13 +69,13 @@ task NanoPlot { output { File dynamicHistogram = outputDir + outputPrefix + "Dynamic_Histogram_Read_length.html" File readLengthHistogram = outputDir + outputPrefix + "HistogramReadlength.png" - File lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" - File lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" File logScaleReadLengthHistogram = outputDir + outputPrefix + "LogTransformed_HistogramReadlength.png" File report = outputDir + outputPrefix + "NanoPlot-report.html" File weightedHistogram = outputDir + outputPrefix + "Weighted_HistogramReadlength.png" File weightedLogScaleHistogram = outputDir + outputPrefix + "Weighted_LogTransformed_HistogramReadlength.png" File yieldByLength = outputDir + outputPrefix + "Yield_By_Length.png" + File? lengthVsQualityScatterPlotDot = outputDir + outputPrefix + "LengthvsQualityScatterPlot_dot.png" + File? lengthVsQualityScatterPlotKde = outputDir + outputPrefix + "LengthvsQualityScatterPlot_kde.png" File? stats = outputDir + outputPrefix + "NanoStats.txt" } @@ -110,13 +110,13 @@ task NanoPlot { # outputs dynamicHistogram: {description: "Dynamic histogram of read length."} readLengthHistogram: {description: "Histogram of read length."} - lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} - lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} logScaleReadLengthHistogram: {description: "Histogram of read lengths after log transformation."} report: {description: "Html summary report."} weightedHistogram: {description: "Weighted histogram of read lengths."} weightedLogScaleHistogram: {description: "Weighted histogram of read lengths after log transformation."} yieldByLength: {description: "Cumulative yield plot."} + lengthVsQualityScatterPlotDot: {description: "Read lengths vs average read quality plot."} + lengthVsQualityScatterPlotKde: {description: "Read lengths vs average read quality plot."} stats: {description: "NanoStats report."} } } From 041721c1f49d981e18477ad208ecad3580fb9dbd Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 21 Sep 2020 16:10:57 +0200 Subject: [PATCH 0605/1208] Remove metrics file. --- CHANGELOG.md | 2 ++ centrifuge.wdl | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce42941e..b11e4223 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Centrifuge: Remove metrics file from classification (which causes the + summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 + Add NanoPlot and NanoQC tasks. + Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary downloading tasks (alternative is refseqtools). diff --git a/centrifuge.wdl b/centrifuge.wdl index bc2ea462..1e7a0b45 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -128,7 +128,6 @@ task Classify { ~{inputFormatOptions[inputFormat]} \ ~{true="--phred64" false="--phred33" phred64} \ --min-hitlen ~{minHitLength} \ - ~{"--met-file " + outputPrefix + "_alignment_metrics.tsv"} \ --threads ~{threads} \ ~{"--trim5 " + trim5} \ ~{"--trim3 " + trim3} \ @@ -143,7 +142,6 @@ task Classify { >>> output { - File metrics = outputPrefix + "_alignment_metrics.tsv" File classification = outputPrefix + "_classification.tsv" File report = outputPrefix + "_output_report.tsv" } @@ -175,7 +173,6 @@ task Classify { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - metrics: {description: "File with centrifuge metrics."} classification: {description: "File with the classification results."} report: {description: "File with a classification summary."} } From fbbfc5bec27636e709de907c871efaab24d8f1c1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 21 Sep 2020 16:13:57 +0200 Subject: [PATCH 0606/1208] Change indexing. --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b11e4223..142622e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + Centrifuge: Remove metrics file from classification (which causes the - summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 + summary report to be empty). + https://github.com/DaehwanKimLab/centrifuge/issues/83 + Add NanoPlot and NanoQC tasks. + Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary downloading tasks (alternative is refseqtools). From 66852ef0a1f5a08259a0f8eafc01d7a5d2bf1732 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Sep 2020 16:39:10 +0200 Subject: [PATCH 0607/1208] add snpeff task --- snpeff.wdl | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 snpeff.wdl diff --git a/snpeff.wdl b/snpeff.wdl new file mode 100644 index 00000000..e1c0184f --- /dev/null +++ b/snpeff.wdl @@ -0,0 +1,73 @@ +version 1.0 + +task snpEff { + input { + File vcf + File vcfIndex + String genomeVersion + File datadirZip + String outputPath = "./snpeff.vcf" + Boolean hgvs = true + Boolean lof = true + Boolean noDownstream = false + Boolean noIntergenic = false + Boolean noShiftHgvs = false + Int? upDownStreamLen + + String memory = "50G" + String javaXmx = "49G" + Int timeMinutes = 60 #FIXME + String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + unzip ~{datadirZip} + snpEff -Xmx~{javaXmx}G -XX:ParallelGCThreads=1 \ + -v \ + ~{genomeVersion} \ + -noDownload \ + -dataDir $PWD/data \ + ~{vcf} \ + ~{true="-hgvs" false="-noHgvs" hgvs} \ + ~{true="-lof" false="-noLof" lof} \ + ~{true="-no-downstream" false="" noDownstream} \ + ~{true="-no-intergenic" false="" noIntergenic} \ + ~{true="-noShiftHgvs" false="" noShiftHgvs} \ + ~{"-upDownStreamLen " + upDownStreamLen} \ + > ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + vcf: {description: "A VCF file to analyse.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "required"} + genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"} + datadirZip: {description: "A zip file containing the directory of databases. This zip file must contain a directory called `data`, with the database mentioned in the genomeVersion input as subdirectory.", + category: "required"} + outputPath: {description: "The path to write the output to.", category: "common"} + hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"} + lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"} + noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"} + noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"} + noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"} + upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 3ee13418733a762df9883266a73d14426bd26118 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 1 Oct 2020 09:47:52 +0200 Subject: [PATCH 0608/1208] typo --- snpeff.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index e1c0184f..95383b94 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -1,6 +1,6 @@ version 1.0 -task snpEff { +task SnpEff { input { File vcf File vcfIndex From 35bc2ba3fe927ed842464444506f191f4c268c84 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Fri, 2 Oct 2020 13:17:12 +0200 Subject: [PATCH 0609/1208] Add parameter meta for threads --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index c155f026..dd771415 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -362,6 +362,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 81095b1bb400c28b3ad01cfb6ddef7b6a74907ed Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Fri, 2 Oct 2020 13:19:18 +0200 Subject: [PATCH 0610/1208] Update Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142622e2..e2f266e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Samtools: Add parameter meta for Merge task + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From 38333745daff01234eb36e178fb97ffb76c87d84 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Oct 2020 14:20:32 +0200 Subject: [PATCH 0611/1208] fix bcftools filter --- bcftools.wdl | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index affa805a..b1d6e5f0 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -125,6 +125,53 @@ task Annotate { } } +task Filter { + input { + File vcf + File vcfIndex + Array[String] include = [] + String outputPath = "./filtered.vcf.gz" + + String memory = "256M" + Int timeMinutes = 1 + ceil(size(vcf, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + bcftools \ + filter \ + ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ + ~{vcf} \ + -O z \ + -o ~{outputPath} + bctools index --tbi ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + vcf: {description: "The VCF file to operate on.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "required"} + include: {description: "Equivalent to the `-i` option.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + } +} + task Sort { input { File inputFile From 66399ba333105934575da4ff97e43f6e35ef06d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 5 Oct 2020 13:07:13 +0200 Subject: [PATCH 0612/1208] fix whitespace --- bcftools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index b1d6e5f0..619c1733 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -141,11 +141,11 @@ task Filter { set -e mkdir -p "$(dirname ~{outputPath})" bcftools \ - filter \ + filter \ ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ - ~{vcf} \ - -O z \ - -o ~{outputPath} + ~{vcf} \ + -O z \ + -o ~{outputPath} bctools index --tbi ~{outputPath} } From 28bd67e696bfb2302920cc76245f3a6a86161948 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 6 Oct 2020 13:40:28 +0200 Subject: [PATCH 0613/1208] Update CHANGELOG.md Co-authored-by: Davy Cats --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2f266e9..7668cd2b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Samtools: Add parameter meta for Merge task ++ Samtools: Add `threads` to parameter meta for Merge task + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From afe600065e0d94a80ba68bba5f23bed8a9f52293 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 6 Oct 2020 13:40:44 +0200 Subject: [PATCH 0614/1208] Update samtools.wdl Co-authored-by: Davy Cats --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index dd771415..24d95aa4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -362,7 +362,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - threads: {description: "Number of threads to use.", category: "common"} + threads: {description: "Number of threads to use.", category: "advanced"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 09372028e140528ccc255b73c87b48ad45a93a77 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 6 Oct 2020 16:20:29 +0200 Subject: [PATCH 0615/1208] fix bcftools filter --- bcftools.wdl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 619c1733..0be3be93 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -129,7 +129,9 @@ task Filter { input { File vcf File vcfIndex - Array[String] include = [] + String? include + String? exclude + String? softFilter String outputPath = "./filtered.vcf.gz" String memory = "256M" @@ -142,7 +144,9 @@ task Filter { mkdir -p "$(dirname ~{outputPath})" bcftools \ filter \ - ~{true="-i" false="" length(include) > 0} ~{sep=" -i " include} \ + ~{"-i " + include} \ + ~{"-e " + exclude} \ + ~{"-s " + softFilter} ~{vcf} \ -O z \ -o ~{outputPath} From 2fdabcca7e4bba7e1ba2a30d6e47dfb478e58e11 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 7 Oct 2020 13:54:41 +0200 Subject: [PATCH 0616/1208] Update parameter_meta. --- CHANGELOG.md | 1 + nanopack.wdl | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142622e2..803c221b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ NanoPack: Add parameter_meta to NanoPlot task. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 diff --git a/nanopack.wdl b/nanopack.wdl index 6860cf13..e4d15135 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -92,6 +92,7 @@ task NanoPlot { inputFileType: {description: "The format of the read file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputPrefix: {description: "Output file prefix.", category: "required"} + outputPath: {description: "Combination of the outputDir & outputPrefix strings.", category: "advanced"} outputTsvStats: {description: "Output the stats file as a properly formatted TSV.", category: "common"} dropOutliers: {description: "Drop outlier reads with extreme long length.", category: "advanced"} logLengths: {description: "Additionally show logarithmic scaling of lengths in plots.", category: "advanced"} From 6eaf21442d9352266f0ac3e108cf1dc084c1c9f4 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:07:39 +0200 Subject: [PATCH 0617/1208] Ensure that the index and bamfiles are in the same folder --- bam2fastx.wdl | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 42240cd4..18434755 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -91,12 +91,25 @@ task Bam2Fastq { command { set -e mkdir -p "$(dirname ~{outputPrefix})" + + # Localise the bam and pbi files so they are next to each other in the + # current folder + bamfiles="" + for bamfile in ~{sep=" " bam};do + ln $bamfile . + bamfiles=$bamfiles" $(basename $bamfile)" + done + + for bamindex in ~{sep=" " bamIndex}; do + ln $bamindex . + done + bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " bam} + $bamfiles } output { From 0dd0afd61c43b625146adce4b4507ec85803381a Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:08:02 +0200 Subject: [PATCH 0618/1208] Add bam index file as required input for isoseq --- isoseq3.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/isoseq3.wdl b/isoseq3.wdl index 604a71d5..7894b382 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -26,6 +26,7 @@ task Refine { Boolean requirePolyA = false String logLevel = "WARN" File inputBamFile + File inputBamIndex File primerFile String outputDir String outputNamePrefix From 571544cbcbeeda14eadce3b7d633626fcb4f518e Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:08:23 +0200 Subject: [PATCH 0619/1208] Simplify lima output structure --- lima.wdl | 32 ++++++++++---------------------- samtools.wdl | 1 + 2 files changed, 11 insertions(+), 22 deletions(-) diff --git a/lima.wdl b/lima.wdl index 1a40b1c8..38cf2d6e 100644 --- a/lima.wdl +++ b/lima.wdl @@ -58,7 +58,6 @@ task Lima { command { set -e - mkdir -p "$(dirname ~{outputPrefix})" lima \ ~{libraryDesignOptions[libraryDesign]} \ ~{true="--score-full-pass" false="" scoreFullPass} \ @@ -83,32 +82,21 @@ task Lima { ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ + ~{"--log-file " + outputPrefix + ".stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{basename(outputPrefix) + ".fl.bam"} - - # copy commands below are needed because glob command does not find - # multiple bam/bam.pbi/subreadset.xml files when not located in working - # directory. - cp "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json" - cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts" - cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report" - cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary" - find . -path "*.bam" > bamFiles.txt - find . -path "*.bam.pbi" > bamIndexes.txt - find . -path "*.subreadset.xml" > subreadsets.txt + ~{outputPrefix + ".bam"} } output { - Array[File] limaBam = read_lines("bamFiles.txt") - Array[File] limaBamIndex = read_lines("bamIndexes.txt") - Array[File] limaXml = read_lines("subreadsets.txt") - File limaStderr = outputPrefix + ".fl.stderr.log" - File limaJson = outputPrefix + ".fl.json" - File limaCounts = outputPrefix + ".fl.lima.counts" - File limaReport = outputPrefix + ".fl.lima.report" - File limaSummary = outputPrefix + ".fl.lima.summary" + Array[File] limaBam = glob("*.bam") + Array[File] limaBamIndex = glob("*.bam.pbi") + Array[File] limaXml = glob("*.subreadset.xml") + File limaStderr = outputPrefix + ".stderr.log" + File limaJson = outputPrefix + ".json" + File limaCounts = outputPrefix + ".lima.counts" + File limaReport = outputPrefix + ".lima.report" + File limaSummary = outputPrefix + ".lima.summary" } runtime { diff --git a/samtools.wdl b/samtools.wdl index c155f026..dd771415 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -362,6 +362,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} + threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 5ca9c5e22734456a7735ce383d695877e6cb9c08 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:09:11 +0200 Subject: [PATCH 0620/1208] Add task for indexing PacBio bam files --- pbbam.wdl | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 pbbam.wdl diff --git a/pbbam.wdl b/pbbam.wdl new file mode 100644 index 00000000..368ff4ed --- /dev/null +++ b/pbbam.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + +task Index { + input { + File bamFile + String? outputBamPath + String memory = "2G" + Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" + } + + # Select_first is needed, otherwise womtool validate fails. + String outputPath = select_first([outputBamPath, basename(bamFile)]) + String bamIndexPath = outputPath + ".pbi" + + command { + bash -c ' + set -e + # Make sure outputBamPath does not exist. + if [ ! -f ~{outputPath} ] + then + mkdir -p "$(dirname ~{outputPath})" + ln ~{bamFile} ~{outputPath} + fi + pbindex ~{outputPath} ~{bamIndexPath} + ' + } + + output { + File indexedBam = outputPath + File index = bamIndexPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bamFile: {description: "The BAM file for which an index should be made.", category: "required"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", + category: "common"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From d8848dc95d73402eb92483456a35eaac9040a83e Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:10:05 +0200 Subject: [PATCH 0621/1208] Make intervals optional for gatk GenotypeGVCFs --- gatk.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index e0209a0c..12416dda 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -820,7 +820,7 @@ task GenotypeGVCFs { input { File gvcfFile File gvcfFileIndex - Array[File]+ intervals + Array[File]? intervals String outputPath File referenceFasta File referenceFastaDict @@ -846,9 +846,9 @@ task GenotypeGVCFs { ~{"-D " + dbsnpVCF} \ ~{"--pedigree " + pedigree} \ ~{true="-G" false="" length(annotationGroups) > 0} ~{sep=" -G " annotationGroups} \ - --only-output-calls-starting-in-intervals \ -V ~{gvcfFile} \ - -L ~{sep=' -L ' intervals} + ~{true="--only-output-calls-starting-in-intervals" false="" defined(intervals)} \ + ~{true="-L" false="" defined(intervals)} ~{sep=' -L ' intervals} } output { @@ -866,7 +866,7 @@ task GenotypeGVCFs { parameter_meta { gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"} gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"} - intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"} outputPath: {description: "The location to write the output VCF file to.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} From d786fcec2cf3b7ecbe0cdbccbe412cef382fac71 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:10:42 +0200 Subject: [PATCH 0622/1208] Increase runtime and add sample name for pbmm2 --- pbmm2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 84fbd2d0..31d4c667 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -30,7 +30,7 @@ task Mapping { Int cores = 4 String memory = "30G" - Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" } @@ -41,6 +41,7 @@ task Mapping { -j ~{cores} \ ~{referenceMMI} \ ~{queryFile} \ + --sample ~{sample} \ ~{sample}.align.bam } From a2ae010f8efa3f9d03ea99b61038419956be98b3 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:17:10 +0200 Subject: [PATCH 0623/1208] Add HsMetrics and VariantcallingMetrics to picard --- picard.wdl | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/picard.wdl b/picard.wdl index 1afa5ea7..9603db8c 100644 --- a/picard.wdl +++ b/picard.wdl @@ -66,6 +66,70 @@ task BedToIntervalList { } } +task CollectHsMetrics { + input { + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File targets + File? baits + String basename + + + # Use the targets file as baits as a fallback, since often the baits + # for a certain capture kit are not available. + File baitsFile = select_first([baits, targets]) + File targetsFile = targets + + Int memoryMb = javaXmxMb + 512 + Int javaXmxMb = 3072 + # Additional * 2 because picard multiple metrics reads the reference fasta twice. + Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1 \ + CollectHsMetrics \ + I=~{inputBam} \ + R=~{referenceFasta} \ + BAIT_INTERVALS=~{baitsFile} \ + TARGET_INTERVALS=~{targetsFile} \ + O="~{basename}.hs_metrics.txt" + } + + output { + File HsMetrics = basename + ".hs_metrics.txt" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: "~{memoryMb}M" + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CollectMultipleMetrics { input { File inputBam @@ -315,6 +379,53 @@ task CollectTargetedPcrMetrics { } } +task CollectVariantCallingMetrics { + input { + File dbsnp + File dbsnpIndex + File inputVCF + File inputVCFIndex + String basename + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmx} \ + CollectVariantCallingMetrics -XX:ParallelGCThreads=1 \ + DBSNP=~{dbsnp} \ + INPUT=~{inputVCF} \ + OUTPUT=~{basename} + } + + output { + File details = basename + ".variant_calling_detail_metrics" + File summary = basename + ".variant_calling_summary_metrics" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CreateSequenceDictionary { input { File inputFile From e2fbf4a0275a9ae27de653513cd9c6f1b6340915 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:17:25 +0200 Subject: [PATCH 0624/1208] Add deepvariant tasks --- deepvariant.wdl | 91 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 deepvariant.wdl diff --git a/deepvariant.wdl b/deepvariant.wdl new file mode 100644 index 00000000..88bdb352 --- /dev/null +++ b/deepvariant.wdl @@ -0,0 +1,91 @@ +version 1.0 + +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task RunDeepVariant { + input { + File referenceFasta + File referenceFastaIndex + File inputBam + File inputBamIndex + String modelType + String outputVcf + File? customizedModel + Int? numShards + String? outputGVcf + File? regions + String? sampleName + Boolean? VCFStatsReport = true + + String memory = "3G" + Int timeMinutes = 5000 + String dockerImage = "google/deepvariant:1.0.0" + } + + command { + set -e + + /opt/deepvariant/bin/run_deepvariant \ + --ref ~{referenceFasta} \ + --reads ~{inputBam} \ + --model_type ~{modelType} \ + --output_vcf ~{outputVcf} \ + ~{"--output_gvcf " + outputGVcf} \ + ~{"--customized_model " + customizedModel} \ + ~{"--num_shards " + numShards} \ + ~{"--regions} " + regions} \ + ~{"--sample_name " + sampleName} \ + ~{true="--vcf_stats_report" false="--novcf_stats_report" VCFStatsReport} + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + output { + File outputVCF = outputVcf + File outputVCFIndex = outputVCF + ".tbi" + File? outputGVCF = outputGVcf + File? outputGVCFIndex = outputGVcf + ".tbi" + Array[File] outputVCFStatsReport = glob("*.visual_report.html") + } + + parameter_meta { + referenceFasta: {description: "Genome reference to use", category: "required"} + referenceFastaIndex: {description: "Index for the genome reference file.", category: "required"} + inputBam: {description: "Aligned, sorted, indexed BAM file containing the reads we want to call.", category: "required"} + inputBamIndex: {description: "Index for the input bam file.", category: "required"} + modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag", category: "required"} + outputVcf: {description: "Path where we should write VCF file.", category: "required"} + customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used", category: "advanced"} + numShards: {description: "Number of shards for make_examples step.", category: "common"} + outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} + regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} + sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} + VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 41024c35d01b0a954a0eaf6f4f69ab93ec02833b Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:17:33 +0200 Subject: [PATCH 0625/1208] Add whatshap tasks --- whatshap.wdl | 275 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 275 insertions(+) create mode 100644 whatshap.wdl diff --git a/whatshap.wdl b/whatshap.wdl new file mode 100644 index 00000000..2506aa10 --- /dev/null +++ b/whatshap.wdl @@ -0,0 +1,275 @@ +version 1.0 + +# Copyright (c) 2018 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +task Phase { + input { + String outputVCF + File? reference + File? referenceIndex + Boolean? no_reference + String? tag + File? output_read_list + String? algorithm + Boolean? merge_reads + String? internal_downsampling + String? mapping_quality + Boolean? indels + Boolean? ignore_read_groups + String? sample + String? chromosome + String? error_rate + String? maximum_error_rate + String? threshold + String? negative_threshold + Boolean? full_genotyping + Boolean? distrust_genotypes + Boolean? include_homozygous + String? default_gq + String? gl_regularize_r + File? changed_genotype_list + String? ped + File? recombination_list + String? recomb_rate + File? gen_map + Boolean? no_genetic_haplo_typing + Boolean? use_ped_samples + File vcf + File vcfIndex + File phaseInput + File phaseInputIndex + + String memory = "4G" + Int timeMinutes = 120 + # Whatshap 1.0, tabix 0.2.5 + String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" + } + + command { + whatshap phase \ + ~{vcf} \ + ~{phaseInput} \ + ~{if defined(outputVCF) then ("--output " + '"' + outputVCF + '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{true="--no-reference" false="" no_reference} \ + ~{if defined(tag) then ("--tag " + '"' + tag + '"') else ""} \ + ~{if defined(output_read_list) then ("--output-read-list " + '"' + output_read_list + '"') else ""} \ + ~{if defined(algorithm) then ("--algorithm " + '"' + algorithm + '"') else ""} \ + ~{true="--merge-reads" false="" merge_reads} \ + ~{if defined(internal_downsampling) then ("--internal-downsampling " + '"' + internal_downsampling + '"') else ""} \ + ~{if defined(mapping_quality) then ("--mapping-quality " + '"' + mapping_quality + '"') else ""} \ + ~{true="--indels" false="" indels} \ + ~{true="--ignore-read-groups" false="" ignore_read_groups} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ + ~{if defined(error_rate) then ("--error-rate " + '"' + error_rate + '"') else ""} \ + ~{if defined(maximum_error_rate) then ("--maximum-error-rate " + '"' + maximum_error_rate + '"') else ""} \ + ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ + ~{if defined(negative_threshold) then ("--negative-threshold " + '"' + negative_threshold + '"') else ""} \ + ~{true="--full-genotyping" false="" full_genotyping} \ + ~{true="--distrust-genotypes" false="" distrust_genotypes} \ + ~{true="--include-homozygous" false="" include_homozygous} \ + ~{if defined(default_gq) then ("--default-gq " + '"' + default_gq + '"') else ""} \ + ~{if defined(gl_regularize_r) then ("--gl-regularizer " + '"' + gl_regularize_r + '"') else ""} \ + ~{if defined(changed_genotype_list) then ("--changed-genotype-list " + '"' + changed_genotype_list + '"') else ""} \ + ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ + ~{if defined(recombination_list) then ("--recombination-list " + '"' + recombination_list + '"') else ""} \ + ~{if defined(recomb_rate) then ("--recombrate " + '"' + recomb_rate + '"') else ""} \ + ~{if defined(gen_map) then ("--genmap " + '"' + gen_map + '"') else ""} \ + ~{true="--no-genetic-haplotyping" false="" no_genetic_haplo_typing} \ + ~{true="--use-ped-samples" false="" use_ped_samples} && \ + tabix -p vcf ~{outputVCF} + } + + output { + File phasedVCF = outputVCF + File phasedVCFIndex = outputVCF + ".tbi" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} + no_reference: {description: "Detect alleles without requiring a reference, at the expense of phasing quality (in particular for long reads)", category: "common"} + tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} + output_read_list: {description: "Write reads that have been used for phasing to FILE.", category: "advanced"} + algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} + merge_reads: {description: "Merge reads which are likely to come from the same haplotype (default: {description: do not merge reads)", category: "common"} + internal_downsampling: {description: "Coverage reduction parameter in the internal core phasing algorithm. Higher values increase runtime *exponentially* while possibly improving phasing quality marginally. Avoid using this in the normal case! (default: {description: 15)", category: "advanced"} + mapping_quality: {description: "Minimum mapping quality (default: {description: 20)", category: "common"} + indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} + ignore_read_groups: {description: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample.", category: "advanced"} + sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} + chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} + error_rate: {description: "The probability that a nucleotide is wrong in read merging model (default: {description: 0.15).", category: "advanced"} + maximum_error_rate: {description: "The maximum error rate of any edge of the read merging graph before discarding it (default: {description: 0.25).", category: "advanced"} + threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} + negative_threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from different haplotypes and the same haplotype in the read merging model (default: {description: 1000).", category: "advanced"} + full_genotyping: {description: "Completely re-genotype all variants based on read data, ignores all genotype data that might be present in the VCF (EXPERIMENTAL FEATURE).", category: "experimental"} + distrust_genotypes: {description: "Allow switching variants from hetero- to homozygous in an optimal solution (see documentation).", category: "advanced"} + include_homozygous: {description: "Also work on homozygous variants, which might be turned to heterozygous", category: "advanced"} + default_gq: {description: "Default genotype quality used as cost of changing a genotype when no genotype likelihoods are available (default 30)", category: "advanced"} + gl_regularize_r: {description: "Constant (float) to be used to regularize genotype likelihoods read from input VCF (default None).", category: "advanced"} + changed_genotype_list: {description: "Write list of changed genotypes to FILE.", category: "advanced"} + ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} + recombination_list: {description: "Write putative recombination events to FILE.", category: "advanced"} + recomb_rate: {description: "Recombination rate in cM/Mb (used with --ped). If given, a constant recombination rate is assumed (default: {description: 1.26cM/Mb).", category: "advanced"} + gen_map: {description: "File with genetic map (used with --ped) to be used instead of constant recombination rate, i.e. overrides option --recombrate.", category: "advanced"} + no_genetic_haplo_typing: {description: "Do not merge blocks that are not connected by reads (i.e. solely based on genotype status). Default: {description: when in --ped mode, merge all blocks that contain at least one homozygous genotype in at least one individual into one block.", category: "advanced"} + use_ped_samples: {description: "Only work on samples mentioned in the provided PED file.", category: "advanced"} + vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} + phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task Stats { + input { + String? gtf + String? sample + String? chr_lengths + String? tsv + Boolean? only_sn_vs + String? block_list + String? chromosome + File vcf + + String memory = "4G" + Int timeMinutes = 120 + # Whatshap 1.0, tabix 0.2.5 + String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" + } + + command { + whatshap stats \ + ~{vcf} \ + ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(chr_lengths) then ("--chr-lengths " + '"' + chr_lengths + '"') else ""} \ + ~{if defined(tsv) then ("--tsv " + '"' + tsv + '"') else ""} \ + ~{true="--only-snvs" false="" only_sn_vs} \ + ~{if defined(block_list) then ("--block-list " + '"' + block_list + '"') else ""} \ + ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} + } + + output { + File? phasedGTF = gtf + File? phasedTSV = tsv + File? phasedBlockList = block_list + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + gtf: "Write phased blocks to GTF file." + sample: "Name of the sample to process. If not given, use first sample found in VCF." + chr_lengths: "File with chromosome lengths (one line per chromosome, tab separated ' ') needed to compute N50 values." + tsv: "Filename to write statistics to (tab-separated)." + only_sn_vs: "Only process SNVs and ignore all other variants." + block_list: "Filename to write list of all blocks to (one block per line)." + chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." + vcf: "Phased VCF file" + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task Haplotag { + input { + String outputFile + File? reference + File? referenceFastaIndex + String? regions + Boolean? ignore_linked_read + String? linked_read_distance_cut_off + Boolean? ignore_read_groups + String? sample + String? output_haplo_tag_list + Boolean? tag_supplementary + File vcf + File vcfIndex + File alignments + File alignmentsIndex + + String memory = "4G" + Int timeMinutes = 120 + # Whatshap 1.0, tabix 0.2.5 + String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" + } + + command { + whatshap haplotag \ + ~{vcf} \ + ~{alignments} \ + ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ + ~{true="--ignore-linked-read" false="" ignore_linked_read} \ + ~{if defined(linked_read_distance_cut_off) then ("--linked-read-distance-cutoff " + '"' + linked_read_distance_cut_off + '"') else ""} \ + ~{true="--ignore-read-groups" false="" ignore_read_groups} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(output_haplo_tag_list) then ("--output-haplotag-list " + '"' + output_haplo_tag_list + '"') else ""} \ + ~{true="--tag-supplementary" false="" tag_supplementary} && \ + python3 -c "import pysam; pysam.index('~{outputFile}')" + } + + output { + File bam = outputFile + File bamIndex = outputFile + ".bai" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + outputFile: "Output file. If omitted, use standard output." + reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created" + regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." + ignore_linked_read: "Ignore linkage information stored in BX tags of the reads." + linked_read_distance_cut_off: "Assume reads with identical BX tags belong to different read clouds if their distance is larger than LINKEDREADDISTANCE (default: 50000)." + ignore_read_groups: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample." + sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." + output_haplo_tag_list: "Write assignments of read names to haplotypes (tab separated) to given output file. If filename ends in .gz, then output is gzipped." + tag_supplementary: "Also tag supplementary alignments. Supplementary alignments are assigned to the same haplotype the primary alignment has been assigned to (default: only tag primary alignments)." + vcf: "VCF file with phased variants (must be gzip-compressed and indexed)" + alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype" + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 788681506815ef10573eb86cea4efe22f300b5db Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 7 Oct 2020 14:22:10 +0200 Subject: [PATCH 0626/1208] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 142622e2..26711b72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,11 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ deepvariant: Add task for DeepVariant ++ gatk: Make intervals optional for GenotypeGVCFs ++ isoseq3: Add required bam index input to isoseq3 ++ pbbam: Add task for indexing PacBio bam files ++ picard: Add CollectHsMetrics and CollectVariantCallingMetrics + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From f531d274c8fcd0789318f08a61b2aa50bed0d3fa Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:23:53 +0200 Subject: [PATCH 0627/1208] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26711b72..2ef37f31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ deepvariant: Add task for DeepVariant ++ deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs + isoseq3: Add required bam index input to isoseq3 + pbbam: Add task for indexing PacBio bam files From 1f0a112b763687055b2b647d7f1845d4e57a5664 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:02 +0200 Subject: [PATCH 0628/1208] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ef37f31..f4d217fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + deepvariant: Add task for DeepVariant. -+ gatk: Make intervals optional for GenotypeGVCFs ++ gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3 + pbbam: Add task for indexing PacBio bam files + picard: Add CollectHsMetrics and CollectVariantCallingMetrics From d4cfd015be4aacc306454b4410bd6a98a79627bc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:11 +0200 Subject: [PATCH 0629/1208] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4d217fc..1f75492f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ version 5.0.0-dev --------------------------- + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. -+ isoseq3: Add required bam index input to isoseq3 ++ isoseq3: Add required bam index input to isoseq3. + pbbam: Add task for indexing PacBio bam files + picard: Add CollectHsMetrics and CollectVariantCallingMetrics + Centrifuge: Remove metrics file from classification (which causes the From 80c84a4ae5946a0297bc0f30afaec66f327a8d55 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:20 +0200 Subject: [PATCH 0630/1208] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f75492f..6230afbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ version 5.0.0-dev + gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3. + pbbam: Add task for indexing PacBio bam files -+ picard: Add CollectHsMetrics and CollectVariantCallingMetrics ++ picard: Add CollectHsMetrics and CollectVariantCallingMetrics. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From 31bbeddf090f618084a71ecbd33a90842aa46b40 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:24:35 +0200 Subject: [PATCH 0631/1208] Update picard.wdl Co-authored-by: Jasper --- picard.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 9603db8c..d6b23245 100644 --- a/picard.wdl +++ b/picard.wdl @@ -77,7 +77,6 @@ task CollectHsMetrics { File? baits String basename - # Use the targets file as baits as a fallback, since often the baits # for a certain capture kit are not available. File baitsFile = select_first([baits, targets]) From b6178110f9824758ac3a4e94f025825d23c170a2 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:36:20 +0200 Subject: [PATCH 0632/1208] Update parameter meta --- isoseq3.wdl | 1 + picard.wdl | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/isoseq3.wdl b/isoseq3.wdl index 7894b382..5060f0e7 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -73,6 +73,7 @@ task Refine { requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} inputBamFile: {description: "Bam input file.", category: "required"} + inputBamIndex: {description: "Index for the Bam input file.", category: "required"} primerFile: {description: "Barcode/primer fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputNamePrefix: {description: "Basename of the output files.", category: "required"} diff --git a/picard.wdl b/picard.wdl index d6b23245..b5ad0cb4 100644 --- a/picard.wdl +++ b/picard.wdl @@ -415,6 +415,10 @@ task CollectVariantCallingMetrics { parameter_meta { # inputs + dbsnp: {description: "DBSNP vcf file to use with CollectVariantCallingMetrics.", category: "required"} + dbsnpIndex: {description: "Index file for the DBSNP VCF.", category: "required"} + inputVCF: {description: "Input VCF file", category: "required"} + inputVCFIndex: {description: "Index file for the input VCF.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 353224aadecf82940e915424a017870ff2580d20 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 8 Oct 2020 07:42:45 +0200 Subject: [PATCH 0633/1208] Add parameter meta for CollectHsMetrics --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index b5ad0cb4..49db8b8b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -119,6 +119,8 @@ task CollectHsMetrics { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + targets: {description: "Picard interval file of the capture targets.", category: "required"} + baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", From 7c065d4046a50c89727a1377618919a14814d9c2 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 9 Oct 2020 11:29:04 +0200 Subject: [PATCH 0634/1208] remove outputType and indexing instead based on extension of the file --- bcftools.wdl | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index affa805a..e2251331 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -35,7 +35,6 @@ task Annotate { Boolean keepSites = false String? markSites Boolean noVersion = false - String outputType = "z" String? regions File? regionsFile File? renameChrs @@ -52,14 +51,14 @@ task Annotate { String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } - Boolean indexing = if outputType == "z" then true else false + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools annotate \ -o ~{outputPath} \ - -O ~{outputType} \ + -O ~{true="z" false="v" compressed} \ ~{"--annotations " + annsFile} \ ~{"--collapse " + collapse} \ ~{true="--columns" false="" length(columns) > 0} ~{sep="," columns} \ @@ -80,7 +79,7 @@ task Annotate { ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ ~{inputFile} - ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } @@ -97,7 +96,6 @@ task Annotate { parameter_meta { outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} @@ -132,20 +130,19 @@ task Sort { String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" - String outputType = "z" } - Boolean indexing = if outputType == "z" then true else false + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools sort \ -o ~{outputPath} \ - -O ~{outputType} \ + -O ~{true="z" false="v" compressed} \ ~{inputFile} - ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } output { @@ -162,7 +159,6 @@ task Sort { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -280,26 +276,22 @@ task View { input { File inputFile String outputPath = "output.vcf" - Int compressionLevel = 0 String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } - String outputType = if compressionLevel > 0 then "z" else "v" - Boolean indexing = if compressionLevel > 0 then true else false - String outputFilePath = if compressionLevel > 0 then outputPath + ".gz" else outputPath + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ -o ~{outputPath} \ - -l ~{compressionLevel} \ - -O ~{outputType} \ + -O ~{true="z" false="v" compressed} \ ~{inputFile} - ~{if indexing then 'bcftools index --tbi ~{outputPath}' else ''} + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } output { File outputVcf = outputPath @@ -314,7 +306,6 @@ task View { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} - compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d96e2b14a6cd362b1d7cf8e613e10a19ee98e315 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 9 Oct 2020 11:47:20 +0200 Subject: [PATCH 0635/1208] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9329bf5..cfda7abb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ bcftools: remove outputType and implement indexing based on output file extension. + collect-columns: updated docker image to version 1.0.0 and added the `sumOnDuplicateId` input (defaults to false). + survivor: replace integer boolean type to logical true or false value. From 14d3118230bd2e42e5dec40e5312091518b6ab19 Mon Sep 17 00:00:00 2001 From: Jasper Date: Mon, 12 Oct 2020 13:25:10 +0200 Subject: [PATCH 0636/1208] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7668cd2b..11a39d89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Samtools: Add `threads` to parameter meta for Merge task ++ Samtools: Add `threads` to parameter meta for Merge task. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). https://github.com/DaehwanKimLab/centrifuge/issues/83 From 153438890ea1068846522b7e6386256bba48ab71 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 12 Oct 2020 15:53:41 +0200 Subject: [PATCH 0637/1208] add tmpDir input to specify temporary directory when sorting. --- CHANGELOG.md | 1 + bcftools.wdl | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c32d349..700bf0b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ bcftools: add tmpDir input to specify temporary directory when sorting. + bcftools: remove outputType and implement indexing based on output file extension. + NanoPack: Add parameter_meta to NanoPlot task. + Centrifuge: Remove metrics file from classification (which causes the diff --git a/bcftools.wdl b/bcftools.wdl index e2251331..63f2cacb 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -127,6 +127,7 @@ task Sort { input { File inputFile String outputPath = "output.vcf.gz" + String tmpDir = "./sorting-tmp" String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -136,10 +137,11 @@ task Sort { command { set -e - mkdir -p "$(dirname ~{outputPath})" + mkdir -p "$(dirname ~{outputPath})" ~{tmpDir} bcftools sort \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ + -T ~{tmpDir} \ ~{inputFile} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} From 72ad1f1b4c6123a72518de01e36c0ba6a79657bb Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 12 Oct 2020 16:21:06 +0200 Subject: [PATCH 0638/1208] add tmpDir to parameter_meta section --- bcftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bcftools.wdl b/bcftools.wdl index 63f2cacb..a0aeb442 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -161,6 +161,7 @@ task Sort { parameter_meta { inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + tmpDir: {description: "The location of the temporary files during the bcftools sorting.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4760d1873df4204bb64c38f6d6c8378c41568b46 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Oct 2020 10:08:58 +0200 Subject: [PATCH 0639/1208] remove redundant G in -Xmx in snpeff --- snpeff.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index 95383b94..079a720a 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -24,7 +24,7 @@ task SnpEff { set -e mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} - snpEff -Xmx~{javaXmx}G -XX:ParallelGCThreads=1 \ + snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -v \ ~{genomeVersion} \ -noDownload \ From 3fa0f1411831448f15e17506dfef9230b303a5f1 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 15 Oct 2020 15:38:45 +0200 Subject: [PATCH 0640/1208] Remove most inputs --- whatshap.wdl | 110 ++++++++------------------------------------------- 1 file changed, 16 insertions(+), 94 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 2506aa10..1334d45b 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -26,33 +26,13 @@ task Phase { String outputVCF File? reference File? referenceIndex - Boolean? no_reference String? tag - File? output_read_list String? algorithm - Boolean? merge_reads - String? internal_downsampling - String? mapping_quality Boolean? indels - Boolean? ignore_read_groups String? sample String? chromosome - String? error_rate - String? maximum_error_rate String? threshold - String? negative_threshold - Boolean? full_genotyping - Boolean? distrust_genotypes - Boolean? include_homozygous - String? default_gq - String? gl_regularize_r - File? changed_genotype_list String? ped - File? recombination_list - String? recomb_rate - File? gen_map - Boolean? no_genetic_haplo_typing - Boolean? use_ped_samples File vcf File vcfIndex File phaseInput @@ -70,33 +50,13 @@ task Phase { ~{phaseInput} \ ~{if defined(outputVCF) then ("--output " + '"' + outputVCF + '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ - ~{true="--no-reference" false="" no_reference} \ ~{if defined(tag) then ("--tag " + '"' + tag + '"') else ""} \ - ~{if defined(output_read_list) then ("--output-read-list " + '"' + output_read_list + '"') else ""} \ ~{if defined(algorithm) then ("--algorithm " + '"' + algorithm + '"') else ""} \ - ~{true="--merge-reads" false="" merge_reads} \ - ~{if defined(internal_downsampling) then ("--internal-downsampling " + '"' + internal_downsampling + '"') else ""} \ - ~{if defined(mapping_quality) then ("--mapping-quality " + '"' + mapping_quality + '"') else ""} \ ~{true="--indels" false="" indels} \ - ~{true="--ignore-read-groups" false="" ignore_read_groups} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ - ~{if defined(error_rate) then ("--error-rate " + '"' + error_rate + '"') else ""} \ - ~{if defined(maximum_error_rate) then ("--maximum-error-rate " + '"' + maximum_error_rate + '"') else ""} \ ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ - ~{if defined(negative_threshold) then ("--negative-threshold " + '"' + negative_threshold + '"') else ""} \ - ~{true="--full-genotyping" false="" full_genotyping} \ - ~{true="--distrust-genotypes" false="" distrust_genotypes} \ - ~{true="--include-homozygous" false="" include_homozygous} \ - ~{if defined(default_gq) then ("--default-gq " + '"' + default_gq + '"') else ""} \ - ~{if defined(gl_regularize_r) then ("--gl-regularizer " + '"' + gl_regularize_r + '"') else ""} \ - ~{if defined(changed_genotype_list) then ("--changed-genotype-list " + '"' + changed_genotype_list + '"') else ""} \ ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ - ~{if defined(recombination_list) then ("--recombination-list " + '"' + recombination_list + '"') else ""} \ - ~{if defined(recomb_rate) then ("--recombrate " + '"' + recomb_rate + '"') else ""} \ - ~{if defined(gen_map) then ("--genmap " + '"' + gen_map + '"') else ""} \ - ~{true="--no-genetic-haplotyping" false="" no_genetic_haplo_typing} \ - ~{true="--use-ped-samples" false="" use_ped_samples} && \ tabix -p vcf ~{outputVCF} } @@ -114,33 +74,13 @@ task Phase { parameter_meta { outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} - no_reference: {description: "Detect alleles without requiring a reference, at the expense of phasing quality (in particular for long reads)", category: "common"} tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} - output_read_list: {description: "Write reads that have been used for phasing to FILE.", category: "advanced"} algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} - merge_reads: {description: "Merge reads which are likely to come from the same haplotype (default: {description: do not merge reads)", category: "common"} - internal_downsampling: {description: "Coverage reduction parameter in the internal core phasing algorithm. Higher values increase runtime *exponentially* while possibly improving phasing quality marginally. Avoid using this in the normal case! (default: {description: 15)", category: "advanced"} - mapping_quality: {description: "Minimum mapping quality (default: {description: 20)", category: "common"} indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} - ignore_read_groups: {description: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample.", category: "advanced"} sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} - error_rate: {description: "The probability that a nucleotide is wrong in read merging model (default: {description: 0.15).", category: "advanced"} - maximum_error_rate: {description: "The maximum error rate of any edge of the read merging graph before discarding it (default: {description: 0.25).", category: "advanced"} threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} - negative_threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from different haplotypes and the same haplotype in the read merging model (default: {description: 1000).", category: "advanced"} - full_genotyping: {description: "Completely re-genotype all variants based on read data, ignores all genotype data that might be present in the VCF (EXPERIMENTAL FEATURE).", category: "experimental"} - distrust_genotypes: {description: "Allow switching variants from hetero- to homozygous in an optimal solution (see documentation).", category: "advanced"} - include_homozygous: {description: "Also work on homozygous variants, which might be turned to heterozygous", category: "advanced"} - default_gq: {description: "Default genotype quality used as cost of changing a genotype when no genotype likelihoods are available (default 30)", category: "advanced"} - gl_regularize_r: {description: "Constant (float) to be used to regularize genotype likelihoods read from input VCF (default None).", category: "advanced"} - changed_genotype_list: {description: "Write list of changed genotypes to FILE.", category: "advanced"} ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} - recombination_list: {description: "Write putative recombination events to FILE.", category: "advanced"} - recomb_rate: {description: "Recombination rate in cM/Mb (used with --ped). If given, a constant recombination rate is assumed (default: {description: 1.26cM/Mb).", category: "advanced"} - gen_map: {description: "File with genetic map (used with --ped) to be used instead of constant recombination rate, i.e. overrides option --recombrate.", category: "advanced"} - no_genetic_haplo_typing: {description: "Do not merge blocks that are not connected by reads (i.e. solely based on genotype status). Default: {description: when in --ped mode, merge all blocks that contain at least one homozygous genotype in at least one individual into one block.", category: "advanced"} - use_ped_samples: {description: "Only work on samples mentioned in the provided PED file.", category: "advanced"} vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} @@ -154,10 +94,8 @@ task Stats { input { String? gtf String? sample - String? chr_lengths String? tsv - Boolean? only_sn_vs - String? block_list + String? blockList String? chromosome File vcf @@ -168,21 +106,19 @@ task Stats { } command { - whatshap stats \ + whatshap stats \ ~{vcf} \ ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ - ~{if defined(chr_lengths) then ("--chr-lengths " + '"' + chr_lengths + '"') else ""} \ ~{if defined(tsv) then ("--tsv " + '"' + tsv + '"') else ""} \ - ~{true="--only-snvs" false="" only_sn_vs} \ - ~{if defined(block_list) then ("--block-list " + '"' + block_list + '"') else ""} \ + ~{if defined(blockList) then ("--block-list " + '"' + blockList + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} } output { - File? phasedGTF = gtf - File? phasedTSV = tsv - File? phasedBlockList = block_list + File? phasedGTF = gtf + File? phasedTSV = tsv + File? phasedBlockList = blockList } runtime { @@ -194,10 +130,8 @@ task Stats { parameter_meta { gtf: "Write phased blocks to GTF file." sample: "Name of the sample to process. If not given, use first sample found in VCF." - chr_lengths: "File with chromosome lengths (one line per chromosome, tab separated ' ') needed to compute N50 values." tsv: "Filename to write statistics to (tab-separated)." - only_sn_vs: "Only process SNVs and ignore all other variants." - block_list: "Filename to write list of all blocks to (one block per line)." + blockList: "Filename to write list of all blocks to (one block per line)." chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." vcf: "Phased VCF file" memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -212,12 +146,7 @@ task Haplotag { File? reference File? referenceFastaIndex String? regions - Boolean? ignore_linked_read - String? linked_read_distance_cut_off - Boolean? ignore_read_groups String? sample - String? output_haplo_tag_list - Boolean? tag_supplementary File vcf File vcfIndex File alignments @@ -230,24 +159,19 @@ task Haplotag { } command { - whatshap haplotag \ + whatshap haplotag \ ~{vcf} \ ~{alignments} \ ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{true="--ignore-linked-read" false="" ignore_linked_read} \ - ~{if defined(linked_read_distance_cut_off) then ("--linked-read-distance-cutoff " + '"' + linked_read_distance_cut_off + '"') else ""} \ - ~{true="--ignore-read-groups" false="" ignore_read_groups} \ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ - ~{if defined(output_haplo_tag_list) then ("--output-haplotag-list " + '"' + output_haplo_tag_list + '"') else ""} \ - ~{true="--tag-supplementary" false="" tag_supplementary} && \ python3 -c "import pysam; pysam.index('~{outputFile}')" } output { - File bam = outputFile - File bamIndex = outputFile + ".bai" + File bam = outputFile + File bamIndex = outputFile + ".bai" } runtime { @@ -258,16 +182,14 @@ task Haplotag { parameter_meta { outputFile: "Output file. If omitted, use standard output." - reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created" + reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." + referenceIndex: "Index for the reference file." regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." - ignore_linked_read: "Ignore linkage information stored in BX tags of the reads." - linked_read_distance_cut_off: "Assume reads with identical BX tags belong to different read clouds if their distance is larger than LINKEDREADDISTANCE (default: 50000)." - ignore_read_groups: "Ignore read groups in BAM/CRAM header and assume all reads come from the same sample." sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." - output_haplo_tag_list: "Write assignments of read names to haplotypes (tab separated) to given output file. If filename ends in .gz, then output is gzipped." - tag_supplementary: "Also tag supplementary alignments. Supplementary alignments are assigned to the same haplotype the primary alignment has been assigned to (default: only tag primary alignments)." - vcf: "VCF file with phased variants (must be gzip-compressed and indexed)" - alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype" + vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." + vcfIndex: "Index for the VCF or BCF file with variants to be phased." + alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype." + alignmentsIndex: "Index for the alignment file." memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 289a42d5baaaa7aa0a38cbadde436d610009d4f5 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 15 Oct 2020 15:50:14 +0200 Subject: [PATCH 0641/1208] Rename parameter meta for index --- whatshap.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/whatshap.wdl b/whatshap.wdl index 1334d45b..2ee90f50 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -183,7 +183,7 @@ task Haplotag { parameter_meta { outputFile: "Output file. If omitted, use standard output." reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." - referenceIndex: "Index for the reference file." + referenceFastaIndex: "Index for the reference file." regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." From a772e3773feedcb22f7e18f8a1f0130fd9b3cf0c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 16 Oct 2020 15:08:33 +0200 Subject: [PATCH 0642/1208] add gripss, timeMinutes for gridss, fix typos --- bcftools.wdl | 2 +- gridss.wdl | 7 ++- gripss.wdl | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++ snpeff.wdl | 2 +- 4 files changed, 124 insertions(+), 4 deletions(-) create mode 100644 gripss.wdl diff --git a/bcftools.wdl b/bcftools.wdl index 0be3be93..e68e527c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -150,7 +150,7 @@ task Filter { ~{vcf} \ -O z \ -o ~{outputPath} - bctools index --tbi ~{outputPath} + bcftools index --tbi ~{outputPath} } output { diff --git a/gridss.wdl b/gridss.wdl index 44b9e9f1..7516553d 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2017 Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,7 +34,8 @@ task GRIDSS { String outputPrefix = "gridss" Int jvmHeapSizeGb = 30 - Int threads = 1 + Int threads = 2 + Int timeMinutes = ceil(1440 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -64,6 +65,7 @@ task GRIDSS { runtime { cpu: threads memory: "~{jvmHeapSizeGb + 1}G" + time_minutes: timeMinutes docker: dockerImage } @@ -79,6 +81,7 @@ task GRIDSS { threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } \ No newline at end of file diff --git a/gripss.wdl b/gripss.wdl new file mode 100644 index 00000000..6ed0bcf9 --- /dev/null +++ b/gripss.wdl @@ -0,0 +1,117 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task ApplicationKt { + input { + File inputVcf + String outputPath = "gripss.vcf.gz" + File referenceFasta + File breakpointHotspot + File breakendPon + File breakpointPon + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssApplicationKt \ + -ref_genome ~{referenceFasta} \ + -breakpoint_hotspot ~{breakpointHotspot} \ + -breakend_pon ~{breakendPon} \ + -breakpoint_pon ~{breakpointPon} \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "advanced"} + breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task HardFilterApplicationKt { + input { + File inputVcf + String outputPath = "gripss_hard_filter.vcf.gz" + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file diff --git a/snpeff.wdl b/snpeff.wdl index 079a720a..d639a036 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -24,7 +24,7 @@ task SnpEff { set -e mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} - snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -v \ ~{genomeVersion} \ -noDownload \ From 37ba60dd104f3a221c29d6fd6cf2e5c2be76e1ce Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 22 Oct 2020 07:31:08 +0200 Subject: [PATCH 0643/1208] Add memory to samtools Merge --- samtools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 24d95aa4..ad94338a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -332,6 +332,7 @@ task Merge { Int threads = 1 Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + String memory = "4G" String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -355,6 +356,7 @@ task Merge { runtime { cpu: threads docker: dockerImage + memory: memory time_minutes: timeMinutes } @@ -362,7 +364,7 @@ task Merge { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - threads: {description: "Number of threads to use.", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 6581d965977ab6a4f31058065bca84fc4106ed9f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 22 Oct 2020 14:05:48 +0200 Subject: [PATCH 0644/1208] add AnnotateInsertedSequence task to gridss.wdl --- gridss.wdl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 7516553d..78e4bd40 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -84,4 +84,60 @@ task GRIDSS { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } +} + +task AnnotateInsertedSequence { + input { + File inputVcf + String outputPath = "gridss.annotated.vcf.gz" + File viralReference + + Int threads = 8 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2 / threads) + } + + command { + java -Xmx~{javaXmx} \ + -Dsamjdk.create_index=true \ + -Dsamjdk.use_async_io_read_samtools=true \ + -Dsamjdk.use_async_io_write_samtools=true \ + -Dsamjdk.use_async_io_write_tribble=true \ + -Dsamjdk.buffer_size=4194304 \ + -cp /usr/local/share/gridss-2.9.4-0/gridss.jar \ + gridss.AnnotateInsertedSequence \ + REFERENCE_SEQUENCE=~{viralReference} \ + INPUT=~{inputVcf} \ + OUTPUT=~{outputPath} \ + ALIGNMENT=APPEND \ + WORKING_DIR='.' \ + WORKER_THREADS=~{threads} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF file.", category: "required"} + outputPath: {description: "The path the output will be written to.", category: "common"} + viralReference: {description: "A fasta file with viral sequences.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } } \ No newline at end of file From cd64c02f84707a26ed6787e83269347ed6a69ca4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 22 Oct 2020 15:27:17 +0200 Subject: [PATCH 0645/1208] add some # !UnknownRuntimeKey --- gridss.wdl | 4 ++-- gripss.wdl | 4 ++-- snpeff.wdl | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 78e4bd40..89558ff3 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -65,7 +65,7 @@ task GRIDSS { runtime { cpu: threads memory: "~{jvmHeapSizeGb + 1}G" - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -124,7 +124,7 @@ task AnnotateInsertedSequence { runtime { cpu: threads memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/gripss.wdl b/gripss.wdl index 6ed0bcf9..3f500a60 100644 --- a/gripss.wdl +++ b/gripss.wdl @@ -54,7 +54,7 @@ task ApplicationKt { runtime { memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -100,7 +100,7 @@ task HardFilterApplicationKt { runtime { memory: memory - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/snpeff.wdl b/snpeff.wdl index d639a036..a26fadbd 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -45,7 +45,7 @@ task SnpEff { runtime { docker: dockerImage - time_minutes: timeMinutes + time_minutes: timeMinutes # !UnknownRuntimeKey memory: memory } From 208e8f46530b8a1d0dbdbd3afa22bc7449c03da3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 09:37:14 +0100 Subject: [PATCH 0646/1208] add some missing inputs to gridss AnnotateInsertedSequence and add missing \ to bcftools Filter --- bcftools.wdl | 2 +- gridss.wdl | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index e68e527c..4703580a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -146,7 +146,7 @@ task Filter { filter \ ~{"-i " + include} \ ~{"-e " + exclude} \ - ~{"-s " + softFilter} + ~{"-s " + softFilter} \ ~{vcf} \ -O z \ -o ~{outputPath} diff --git a/gridss.wdl b/gridss.wdl index 89558ff3..cfbb7069 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -91,6 +91,8 @@ task AnnotateInsertedSequence { File inputVcf String outputPath = "gridss.annotated.vcf.gz" File viralReference + File viralReferenceFai + File viralReferenceDict Int threads = 8 String javaXmx = "8G" From 674158b82e2a637c536853113721c48db6e6d09c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 10:51:06 +0100 Subject: [PATCH 0647/1208] add license notice to snpeff, add index input for bcftools annotate, and BWA mem index image input for gridss annotate inserted sequences --- bcftools.wdl | 2 ++ gridss.wdl | 4 ++++ snpeff.wdl | 22 ++++++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 4703580a..d358ab7b 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,6 +44,7 @@ task Annotate { Boolean singleOverlaps = false Array[String] removeAnns = [] File inputFile + File? inputFileIndex String outputPath = "output.vcf.gz" Int threads = 0 @@ -117,6 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} + ipnutFileIndex: {description: "The index for the input vcf or bcf.", "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gridss.wdl b/gridss.wdl index cfbb7069..c444c854 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,6 +93,7 @@ task AnnotateInsertedSequence { File viralReference File viralReferenceFai File viralReferenceDict + File viralReferenceImg Int threads = 8 String javaXmx = "8G" @@ -134,6 +135,9 @@ task AnnotateInsertedSequence { inputVcf: {description: "The input VCF file.", category: "required"} outputPath: {description: "The path the output will be written to.", category: "common"} viralReference: {description: "A fasta file with viral sequences.", category: "required"} + viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} + viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} + viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", diff --git a/snpeff.wdl b/snpeff.wdl index a26fadbd..2a113c52 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -1,5 +1,27 @@ version 1.0 +# MIT License +# +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + task SnpEff { input { File vcf From 836f40c11ad03ca513345ba56b6feb502b2724dc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 11:07:09 +0100 Subject: [PATCH 0648/1208] fix missing key in parameter_met --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index d358ab7b..064e2d6e 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,7 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} - ipnutFileIndex: {description: "The index for the input vcf or bcf.", "common"} + ipnutFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8bc34ddf78f998b838bec85e43926b25da42cc66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 11:12:19 +0100 Subject: [PATCH 0649/1208] typo --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 064e2d6e..3b512716 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,7 +118,7 @@ task Annotate { singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} inputFile: {description: "A vcf or bcf file.", category: "required"} - ipnutFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} + inputFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 070cbb252016d18f59d52e4919a2a267f1c18671 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 13:31:03 +0100 Subject: [PATCH 0650/1208] add missing input --- bcftools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 3b512716..1dba7611 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -25,6 +25,7 @@ version 1.0 task Annotate { input { File? annsFile + File? annsFileIndex String? collapse Array[String] columns = [] String? exclude @@ -99,7 +100,8 @@ task Annotate { parameter_meta { outputPath: {description: "The location the output VCF file should be written.", category: "common"} outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} - annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} + annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "common"} + annsFileIndex: {description: "The index for annsFile.", category: "common"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} From 1e19fbb2a00187bfa10cab023aa52dacb1091e03 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 14:09:13 +0100 Subject: [PATCH 0651/1208] add missing inputs --- gripss.wdl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/gripss.wdl b/gripss.wdl index 3f500a60..c9a8f27d 100644 --- a/gripss.wdl +++ b/gripss.wdl @@ -25,6 +25,8 @@ task ApplicationKt { File inputVcf String outputPath = "gripss.vcf.gz" File referenceFasta + File referenceFastaFai + File referenceFastaDict File breakpointHotspot File breakendPon File breakpointPon @@ -61,7 +63,10 @@ task ApplicationKt { parameter_meta { inputVcf: {description: "The input VCF.", category: "required"} outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "advanced"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} From d6109250b32299638c1d0f47edf580a69b0732b4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 Oct 2020 16:36:10 +0100 Subject: [PATCH 0652/1208] add some cleanup to snpeff --- snpeff.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/snpeff.wdl b/snpeff.wdl index 2a113c52..85709079 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -59,6 +59,7 @@ task SnpEff { ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ > ~{outputPath} + rm -r $PWD/data } output { From a82be38ca7ff228233a5cd49c0495e3714a7ca79 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 09:32:18 +0100 Subject: [PATCH 0653/1208] Update pbbam.wdl Co-authored-by: Jasper --- pbbam.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pbbam.wdl b/pbbam.wdl index 368ff4ed..52737a00 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -23,6 +23,7 @@ task Index { input { File bamFile String? outputBamPath + String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" From 7db21a6481522746b0699c2756083d57326be164 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 16:02:35 +0100 Subject: [PATCH 0654/1208] Add support for outputPrefix with or without folder --- chunked-scatter.wdl | 3 +++ lima.wdl | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index b54a7d2e..8895c2a4 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -24,6 +24,7 @@ task ChunkedScatter { input { File inputFile String prefix = "./scatter" + Boolean splitContigs = false Int? chunkSize Int? overlap Int? minimumBasesPerFile @@ -40,6 +41,7 @@ task ChunkedScatter { ~{"-c " + chunkSize} \ ~{"-o " + overlap} \ ~{"-m " + minimumBasesPerFile} \ + ~{true="--split-contigs " false="" splitContigs} \ ~{inputFile} } @@ -108,6 +110,7 @@ task ScatterRegions { splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} + splitContigs: {description: "Allow contigs to be split during scattering.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/lima.wdl b/lima.wdl index 38cf2d6e..7ef9d4ab 100644 --- a/lima.wdl +++ b/lima.wdl @@ -58,6 +58,7 @@ task Lima { command { set -e + mkdir -p "$(dirname ~{outputPrefix})" lima \ ~{libraryDesignOptions[libraryDesign]} \ ~{true="--score-full-pass" false="" scoreFullPass} \ @@ -86,6 +87,15 @@ task Lima { ~{inputBamFile} \ ~{barcodeFile} \ ~{outputPrefix + ".bam"} + + # copy the files with the default filename to the folder specified in + # outputPrefix. + if [ "~{basename(outputPrefix)}.json" != "~{outputPrefix}.json" ]; then + cp "~{basename(outputPrefix)}.json" "~{outputPrefix}.json" + cp "~{basename(outputPrefix)}.lima.counts" "~{outputPrefix}.lima.counts" + cp "~{basename(outputPrefix)}.lima.report" "~{outputPrefix}.lima.report" + cp "~{basename(outputPrefix)}.lima.summary" "~{outputPrefix}.lima.summary" + fi } output { From a7445b829f0babf6257b376e71f48f4c860828cc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 16:13:02 +0100 Subject: [PATCH 0655/1208] Remove duplicate parameter meta entry --- chunked-scatter.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 8895c2a4..115c5ca4 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -110,7 +110,6 @@ task ScatterRegions { splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} - splitContigs: {description: "Allow contigs to be split during scattering.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From af075999debec07b821010b0e0d260c23b41e143 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 27 Oct 2020 16:19:46 +0100 Subject: [PATCH 0656/1208] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c1f32dd..b27addab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ version 5.0.0-dev + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3. -+ pbbam: Add task for indexing PacBio bam files ++ pbbam: Add task for indexing PacBio bam files. + picard: Add CollectHsMetrics and CollectVariantCallingMetrics. + Samtools: Add `threads` to parameter meta for Merge task. + bcftools: add tmpDir input to specify temporary directory when sorting. From 8df9a800fb56341a2c0b964f9300d49394cf485d Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 06:52:29 +0100 Subject: [PATCH 0657/1208] Update to CCS version 5 --- ccs.wdl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ccs.wdl b/ccs.wdl index 60e43711..bcebefe9 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -29,12 +29,14 @@ task CCS { Float minReadQuality = 0.99 String logLevel = "WARN" File subreadsFile + File? subreadsIndexFile + String? chunkString String outputPrefix Int cores = 2 String memory = "2G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/pbccs:4.2.0--0" + String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" } command { @@ -48,7 +50,8 @@ task CCS { --min-rq ~{minReadQuality} \ --log-level ~{logLevel} \ --num-threads ~{cores} \ - ~{"--report-file " + outputPrefix + ".ccs.report.txt"} \ + ~{"--chunk " + chunkString} \ + ~{"--report-json " + outputPrefix + ".ccs.report.json"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ ~{subreadsFile} \ ~{outputPrefix + ".ccs.bam"} @@ -57,7 +60,7 @@ task CCS { output { File ccsBam = outputPrefix + ".ccs.bam" File ccsBamIndex = outputPrefix + ".ccs.bam.pbi" - File ccsReport = outputPrefix + ".ccs.report.txt" + File ccsReport = outputPrefix + ".ccs.report.json" File ccsStderr = outputPrefix + ".ccs.stderr.log" } @@ -77,6 +80,9 @@ task CCS { minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsFile: {description: "Subreads input file.", category: "required"} + subreadsIndexFile: {description: "Index for the subreads input file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "advanced"} + chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From 910200447daeadbdf8b7698db39719ba35126498 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 06:54:14 +0100 Subject: [PATCH 0658/1208] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b27addab..e2068f49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. + isoseq3: Add required bam index input to isoseq3. From e29df66cd70df1681b892c8fb01af426beb4333a Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 07:03:02 +0100 Subject: [PATCH 0659/1208] Remove duplicate parameter meta --- ccs.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/ccs.wdl b/ccs.wdl index bcebefe9..5d9887bf 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -81,7 +81,6 @@ task CCS { logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsFile: {description: "Subreads input file.", category: "required"} subreadsIndexFile: {description: "Index for the subreads input file.", category: "required"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From ccfd843303c5186121de89a6d667dc1fb20f4100 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 09:12:11 +0100 Subject: [PATCH 0660/1208] Update parameter meta --- ccs.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ccs.wdl b/ccs.wdl index 5d9887bf..cab15fea 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -80,7 +80,7 @@ task CCS { minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsFile: {description: "Subreads input file.", category: "required"} - subreadsIndexFile: {description: "Index for the subreads input file.", category: "required"} + subreadsIndexFile: {description: "Index for the subreads input file, required when using chunkString.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From 61ba73556876d2bb1a1cc73ca9765af29a8e45ba Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 09:45:40 +0100 Subject: [PATCH 0661/1208] Update parameter meta --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index ad94338a..9e415b0e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -367,6 +367,7 @@ task Merge { threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } From 7c63b058e9e1c23407bf5f07c04372d16226523a Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 10:05:57 +0100 Subject: [PATCH 0662/1208] Add postprocess argument to DeepVariant task --- deepvariant.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 88bdb352..10bc49c9 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -28,6 +28,7 @@ task RunDeepVariant { File inputBamIndex String modelType String outputVcf + String? postprocessVariantsExtraArgs File? customizedModel Int? numShards String? outputGVcf @@ -51,8 +52,9 @@ task RunDeepVariant { ~{"--output_gvcf " + outputGVcf} \ ~{"--customized_model " + customizedModel} \ ~{"--num_shards " + numShards} \ - ~{"--regions} " + regions} \ + ~{"--regions " + regions} \ ~{"--sample_name " + sampleName} \ + ~{"--postprocess_variants_extra_args " + postprocessVariantsExtraArgs} \ ~{true="--vcf_stats_report" false="--novcf_stats_report" VCFStatsReport} } From 05f14ce2fa3af46ef79afa3c868837ad49db0fb5 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 10:31:24 +0100 Subject: [PATCH 0663/1208] Update parameter meta --- deepvariant.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/deepvariant.wdl b/deepvariant.wdl index 10bc49c9..f5661886 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -85,6 +85,7 @@ task RunDeepVariant { regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} + postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From 680563febf9dba81cff822f73ab599b351f3e7c6 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Wed, 28 Oct 2020 10:33:36 +0100 Subject: [PATCH 0664/1208] Fix bug in whatshap task --- whatshap.wdl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 2ee90f50..93624590 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -45,6 +45,8 @@ task Phase { } command { + set -e + whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -56,7 +58,8 @@ task Phase { ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \ ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \ - ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} \ + ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""} + tabix -p vcf ~{outputVCF} } @@ -159,13 +162,16 @@ task Haplotag { } command { + set -e + whatshap haplotag \ ~{vcf} \ ~{alignments} \ ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} + python3 -c "import pysam; pysam.index('~{outputFile}')" } From f4fee79b3e26f11c9b6dce07a64e517596a6ca78 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 14:12:09 +0100 Subject: [PATCH 0665/1208] Update first set of tasks to uniform layout. --- CHANGELOG.md | 70 +++++++++++++++++++++++----------- CPAT.wdl | 20 ++++++---- bam2fastx.wdl | 45 +++++++++++++++------- bcftools.wdl | 93 +++++++++++++++++++++++---------------------- bedtools.wdl | 44 +++++++++++++-------- biowdl.wdl | 17 ++++----- bowtie.wdl | 33 ++++++++-------- bwa-mem2.wdl | 38 +++++++++--------- bwa.wdl | 36 +++++++++--------- ccs.wdl | 31 +++++++-------- centrifuge.wdl | 8 ++-- chunked-scatter.wdl | 17 +++++---- clever.wdl | 16 ++++---- collect-columns.wdl | 17 +++++---- common.wdl | 32 +++++++++------- cutadapt.wdl | 45 ++++++++++------------ deepvariant.wdl | 22 +++++------ 17 files changed, 324 insertions(+), 260 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2068f49..2c04b582 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ CCS: `cores` input has been renamed to `threads` to match tool naming. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. @@ -19,7 +20,8 @@ version 5.0.0-dev + picard: Add CollectHsMetrics and CollectVariantCallingMetrics. + Samtools: Add `threads` to parameter meta for Merge task. + bcftools: add tmpDir input to specify temporary directory when sorting. -+ bcftools: remove outputType and implement indexing based on output file extension. ++ bcftools: remove outputType and implement indexing based on output + file extension. + NanoPack: Add parameter_meta to NanoPlot task. + Centrifuge: Remove metrics file from classification (which causes the summary report to be empty). @@ -111,8 +113,8 @@ version 4.0.0 + Change MultiQC inputs. It now accepts an array of reports files. It does not need access to a folder with the reports anymore. MultiQC can now be used as a normal WDL task without hacks. -+ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will make sure the - task will not fail if one of the metrics is set to false. ++ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will + make sure the task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. @@ -134,7 +136,8 @@ version 4.0.0 + Add faidx task to samtools. + Isoseq3: Remove dirname command from output folder creation step. + Isoseq3: Requires more memory by default, is now 2G. -+ Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. ++ Isoseq3: Remove cp commands and other bash magic, file naming is now + solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. @@ -145,7 +148,8 @@ version 3.1.0 + Lima: Add missing output to parameter_meta. + Lima: Remove outputPrefix variable from output section. + Isoseq3: Make sure stderr log file from Refine is unique and not overwritten. -+ Isoseq3: Add workaround in Refine for glob command not locating files in output directory. ++ Isoseq3: Add workaround in Refine for glob command not locating files + in output directory. + Isoseq3: Fix --min-polya-length argument syntax. + Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. @@ -189,10 +193,13 @@ version 3.0.0 + Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set whether output should be a GVCF. + Centrifuge: Add Krona task specific to Centrifuge. -+ Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located. ++ Centrifuge: Fix Centrifuge tests, where sometimes the index files could + still not be located. + Update parameter_meta for TALON, Centrifuge and Minimap2. -+ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location. -+ Add `minimumContigLength` input to PlotDenoisedCopyRatios and PlotModeledSegments. ++ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct + index files location. ++ Add `minimumContigLength` input to PlotDenoisedCopyRatios + and PlotModeledSegments. + Add `commonVariantSitesIndex` input to CollectAllelicCounts. + Centrifuge: Fix issue where Centrifuge could not locate index files. + Increase default memory of BWA mem to 32G (was 16G). @@ -228,11 +235,13 @@ version 3.0.0 + Removed the "extraArgs" input from FilterMutectCalls. + Removed unused "verbose" and "quiet" inputs from multiqc. + Added parameter_meta sections to a variety of tasks. -+ Picard's BedToIntervalList outputPath input is now optional (with a default of "regions.interval_list"). ++ Picard's BedToIntervalList outputPath input is now + optional (with a default of "regions.interval_list"). + TALON: Fix SQLite error concerning database/disk space being full. + Update htseq to default image version 0.11.2. + Update biowdl-input-converter in common.wdl to version 0.2.1. -+ Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. ++ Update TALON section to now include the new annotation file output, and + add config file creation to the TALON task. + Removed unused inputs (trimPrimer and format) for cutadapt. + Various minor command tweaks to increase stability. + Fixed unused inputs in bedtools sort (inputs are now used). @@ -245,7 +254,8 @@ version 2.1.0 + Updated biowdl-input-converter version. + GATK CombineGVCFs memory was tripled to prevent it from using a lot of CPU in Garbage Collection mode. -+ Updated parameter_meta sections for Minimap2 and TranscriptClean to wdl-aid format. ++ Updated parameter_meta sections for Minimap2 and TranscriptClean to + wdl-aid format. + Updated cores variable for TALON, the default is now 4. + Updated TALON to version 4.4. + Added parameter_meta sections to the following tools: @@ -262,10 +272,14 @@ version 2.1.0 version 2.0.0 --------------------------- + TranscriptClean: Update TranscriptClean to version 2.0.2. -+ Memory runtime attributes are now Strings indicating total memory, as opposed to Ints indicating memory per core. -+ Memory inputs for most tasks are now Strings, remaining Int memory inputs are renamed to "memoryGb". -+ Use the biowdl-input-converter container for JsonToYaml, to reduce the amount of containers needed. -+ Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists which it replaces. ++ Memory runtime attributes are now Strings indicating total memory, as + opposed to Ints indicating memory per core. ++ Memory inputs for most tasks are now Strings, remaining Int memory inputs + are renamed to "memoryGb". ++ Use the biowdl-input-converter container for JsonToYaml, to reduce the + amount of containers needed. ++ Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists + which it replaces. + GATK.GenotypeGVCFs: Increased memoryMultiplier from 2.0 to 3.0 . + Minimap2: Add -k option to minimap2 mapping. + Added bwakit task. @@ -279,7 +293,9 @@ version 1.0.0 + Removed deprecated tasks: + bioconda.installPrefix + mergecounts.MergeCounts -+ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" and "knownIndelsSitesVCFIndexes" are no longer optional, but now have a default of "[]". ++ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" + and "knownIndelsSitesVCFIndexes" are no longer optional, but + now have a default of "[]". + Removed BWA index task. + Removed unused "picardJar" input from bwa.wdl. + All inputs to bedtools Sort are now reflected in the generated command. @@ -295,17 +311,25 @@ version 1.0.0 + Fastqsplitter: use version 1.1. + Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency. + Common: Update dockerTag to dockerImage. -+ GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers. -+ Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls). -+ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. ++ GATK: Add CombineVariants task that allows, e.g., to merge VCFs + from different callers. ++ Mutect2: Add GATK tasks related to variant + filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, + CalculateContamination and FilterMutectCalls). ++ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring + an update to GATK 4.1.2.0. + Mutect2: Add necessary missing index attribute for panel of normals. + MultiQC: Add memory variable to multiqc task. -+ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need regions files as required inputs. -+ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired VCF filtering script. -+ Cutadapt: If the output is a gzipped file, compress with level 1 (instead of default 6). ++ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need + regions files as required inputs. ++ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired + VCF filtering script. ++ Cutadapt: If the output is a gzipped file, compress with + level 1 (instead of default 6). + Cutadapt: Fix issues with read2output when using single-end reads. + Add feature type, idattr and additional attributes to htseq-count. + Added allow-contain option to bowtie. + Added a changelog to keep track of changes. -+ Added sortByName task in samtools to support more memory efficient execution of HTSeqCount. ++ Added sortByName task in samtools to support more memory efficient + execution of HTSeqCount. + Removed the bam index from HTSeqCount's inputs. diff --git a/CPAT.wdl b/CPAT.wdl index 3b542e4f..d97031dc 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -26,17 +26,22 @@ task CPAT { String outFilePath File hex File logitModel + File? referenceGenome - File? referenceGenomeIndex # Should be added as input if - # CPAT should not index the reference genome. + # Should be added as input if CPAT should not index the + # reference genome. + File? referenceGenomeIndex Array[String]? startCodons Array[String]? stopCodons + Int timeMinutes = 10 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } - # Some WDL magic in the command section to properly output the start and stopcodons to the command. - # select_first is needed in order to convert the optional arrays to non-optionals. + # Some WDL magic in the command section to properly output the start and + # stopcodons to the command. + # select_first is needed in order to convert the optional arrays + # to non-optionals. command { set -e mkdir -p "$(dirname ~{outFilePath})" @@ -60,18 +65,17 @@ task CPAT { } parameter_meta { + # inputs gene: {description: "Equivalent to CPAT's `--gene` option.", category: "required"} outFilePath: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} hex: {description: "Equivalent to CPAT's `--hex` option.", category: "required"} logitModel: {description: "Equivalent to CPAT's `--logitModel` option.", category: "required"} referenceGenome: {description: "Equivalent to CPAT's `--ref` option.", category: "advanced"} - referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", - category: "advanced"} + referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", category: "advanced"} startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 18434755..e8884ab0 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -37,7 +37,22 @@ task Bam2Fasta { command { set -e - mkdir -p "$(dirname ~{outputPrefix})" + mkdir -p "$(dirname ~{outputPrefix})"' + + # Localise the bam and pbi files so they are next to each other in the + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam}; + do + ln ${bamFile} . + bamFiles=${bamFiles}" $(basename ${bamFile})" + done + + for index in ~{sep=" " bamIndex}; + do + ln ${index} . + done + bam2fasta \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ @@ -93,15 +108,17 @@ task Bam2Fastq { mkdir -p "$(dirname ~{outputPrefix})" # Localise the bam and pbi files so they are next to each other in the - # current folder - bamfiles="" - for bamfile in ~{sep=" " bam};do - ln $bamfile . - bamfiles=$bamfiles" $(basename $bamfile)" + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam}; + do + ln ${bamFile} . + bamFiles=${bamFiles}" $(basename ${bamFile})" done - for bamindex in ~{sep=" " bamIndex}; do - ln $bamindex . + for index in ~{sep=" " bamIndex}; + do + ln ${index} . done bam2fastq \ @@ -109,7 +126,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - $bamfiles + ${bamFiles} } output { diff --git a/bcftools.wdl b/bcftools.wdl index a0aeb442..41825747 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -24,26 +22,27 @@ version 1.0 task Annotate { input { + Array[String] columns = [] + Boolean force = false + Boolean keepSites = false + Boolean noVersion = false + Array[String] samples = [] + Boolean singleOverlaps = false + Array[String] removeAnns = [] + File inputFile + String outputPath = "output.vcf.gz" + File? annsFile String? collapse - Array[String] columns = [] String? exclude - Boolean force = false File? headerLines String? newId String? include - Boolean keepSites = false String? markSites - Boolean noVersion = false String? regions File? regionsFile File? renameChrs - Array[String] samples = [] File? samplesFile - Boolean singleOverlaps = false - Array[String] removeAnns = [] - File inputFile - String outputPath = "output.vcf.gz" Int threads = 0 String memory = "256M" @@ -80,9 +79,8 @@ task Annotate { ~{inputFile} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} - } - + output { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" @@ -95,31 +93,31 @@ task Annotate { } parameter_meta { + # inputs + columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} + force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} + keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} + noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} + removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} + inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} - columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} - force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} - keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} - noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} renameChrs: {description: "rename chromosomes according to the map in file (see man page for details).", category: "advanced"} - samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} samplesFile: {description: "File of samples to include.", category: "advanced"} - singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} - removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} - inputFile: {description: "A vcf or bcf file.", category: "required"} - threads: {description: "Number of extra decompression threads [0].", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -128,6 +126,7 @@ task Sort { File inputFile String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" + String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -159,6 +158,7 @@ task Sort { } parameter_meta { + # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} tmpDir: {description: "The location of the temporary files during the bcftools sorting.", category: "advanced"} @@ -166,46 +166,45 @@ task Sort { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } - - } task Stats { input { File inputVcf File inputVcfIndex + String outputPath = basename(inputVcf) + ".stats" + Boolean firstAlleleOnly = false + Boolean splitByID = false + Array[String] samples = [] + Boolean verbose = false + File? compareVcf File? compareVcfIndex - String outputPath = basename(inputVcf) + ".stats" String? afBins String? afTag - Boolean firstAlleleOnly = false String? collapse String? depth String? exclude - File? exons + File? exons String? applyFilters File? fastaRef File? fastaRefIndex - String? include - Boolean splitByID = false + String? include String? regions File? regionsFile - Array[String] samples = [] - File? samplesFile - String? targets + File? samplesFile + String? targets File? targetsFile String? userTsTv - Boolean verbose = false Int threads = 0 - Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. - String memory = "256M" + String memory = "256M" + Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } - + command { - set -e + set -e mkdir -p $(dirname ~{outputPath}) bcftools stats \ ~{"--af-bins " + afBins} \ @@ -237,19 +236,24 @@ task Stats { runtime { cpu: threads + 1 - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { + # inputs inputVcf: {description: "The VCF to be analysed.", category: "required"} inputVcfIndex: {description: "The index for the input VCF.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} + splitByID: {description: "Collect stats for sites with ID separately (known vs novel).", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} compareVcf: {description: "When inputVcf and compareVCF are given, the program generates separate stats for intersection and the complements. By default only sites are compared, samples must be given to include also sample columns.", category: "common"} compareVcfIndex: {description: "Index for the compareVcf.", category: "common"} afBins: {description: "Allele frequency bins, a list (0.1,0.5,1) or a file (0.1\n0.5\n1).", category: "advanced"} afTag: {description: "Allele frequency tag to use, by default estimated from AN,AC or GT.", category: "advanded"} - firstAlleleOnly: {description: "Include only 1st allele at multiallelic sites.", category: "advanced"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} depth: {description: "Depth distribution: min,max,bin size [0,500,1].", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} @@ -258,20 +262,16 @@ task Stats { fastaRef: {description: "Faidx indexed reference sequence file to determine INDEL context.", category: "advanced"} fastaRefIndex: {description: "Index file (.fai) for fastaRef. Must be supplied if fastaRef is supplied.", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} - splitByID: {description: "Collect stats for sites with ID separately (known vs novel).", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} regionsFile: {description: "Restrict to regions listed in a file.", category: "advanced"} - samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} samplesFile: {description: "File of samples to include.", category: "advanced"} targets: {description: "Similar to regions but streams rather than index-jumps.", category: "advanced"} targetsFile: {description: "Similar to regionsFile but streams rather than index-jumps.", category: "advanced"} userTsTv: {description: ". Collect Ts/Tv stats for any tag using the given binning [0:1:100].", category: "advanced"} threads: {description: "Number of extra decompression threads [0].", category: "advanced"} - verbose: {description: "Produce verbose per-site and per-sample output.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -279,6 +279,7 @@ task View { input { File inputFile String outputPath = "output.vcf" + String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -296,6 +297,7 @@ task View { ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } + output { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" @@ -308,6 +310,7 @@ task View { } parameter_meta { + # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/bedtools.wdl b/bedtools.wdl index c228d6c6..b7a03c17 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -25,6 +25,7 @@ task Complement { File faidx File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" + String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -52,13 +53,13 @@ task Complement { } parameter_meta { + # inputs faidx: {description: "The fasta index (.fai) file from which to extract the genome sizes.", category: "required"} inputBed: {description: "The inputBed to complement.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -66,12 +67,14 @@ task Merge { input { File inputBed String outputBed = "merged.bed" + String memory = "~{512 + ceil(size(inputBed, "M"))}M" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } command { + set -e bedtools merge -i ~{inputBed} > ~{outputBed} } @@ -86,12 +89,12 @@ task Merge { } parameter_meta { + # inputs inputBed: {description: "The bed to merge.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -100,6 +103,7 @@ task MergeBedFiles { input { Array[File]+ bedFiles String outputBed = "merged.bed" + String memory = "~{512 + ceil(size(bedFiles, "M"))}M" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -120,13 +124,14 @@ task MergeBedFiles { time_minutes: timeMinutes docker: dockerImage } + parameter_meta { + # inputs bedFiles: {description: "The bed files to merge.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -139,9 +144,13 @@ task Sort { Boolean chrThenSizeD = false Boolean chrThenScoreA = false Boolean chrThenScoreD = false + String outputBed = "output.sorted.bed" + File? genome File? faidx - String outputBed = "output.sorted.bed" + + String memory = "~{512 + ceil(size(inputBed, "M"))}M" + Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -166,6 +175,8 @@ task Sort { } runtime { + memory: memory + time_minutes: timeMinutes docker: dockerImage } } @@ -174,13 +185,15 @@ task Intersect { input { File regionsA File regionsB - # Giving a faidx file will set the sorted option. - File? faidx String outputBed = "intersect.bed" + + File? faidx # Giving a faidx file will set the sorted option. + String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } + Boolean sorted = defined(faidx) command { @@ -205,14 +218,13 @@ task Intersect { } parameter_meta { - faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", - category: "common"} - regionsA: {description: "Region file a to intersect", category: "required"} - regionsB: {description: "Region file b to intersect", category: "required"} - outputBed: {description: "The path to write the output to", category: "advanced"} + # inputs + regionsA: {description: "Region file a to intersect.", category: "required"} + regionsB: {description: "Region file b to intersect.", category: "required"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", category: "common"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/biowdl.wdl b/biowdl.wdl index 838755d9..8a1f9dfd 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -32,6 +32,7 @@ task InputConverter { Boolean checkFileMd5sums=false Boolean old=false + String memory = "128M" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } @@ -52,22 +53,20 @@ task InputConverter { } runtime { - memory: "128M" + memory: memory time_minutes: timeMinutes docker: dockerImage } parameter_meta { + # inputs samplesheet: {description: "The samplesheet to be processed.", category: "required"} - outputFile: {description: "The location the JSON representation of the samplesheet should be written to.", - category: "advanced"} - skipFileCheck: {description: "Whether or not the existance of the files mentioned in the samplesheet should be checked.", - category: "advanced"} - checkFileMd5sums: {description: "Whether or not the MD5 sums of the files mentioned in the samplesheet should be checked.", - category: "advanced"} + outputFile: {description: "The location the JSON representation of the samplesheet should be written to.", category: "advanced"} + skipFileCheck: {description: "Whether or not the existance of the files mentioned in the samplesheet should be checked.", category: "advanced"} + checkFileMd5sums: {description: "Whether or not the MD5 sums of the files mentioned in the samplesheet should be checked.", category: "advanced"} old: {description: "Whether or not the old samplesheet format should be used.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/bowtie.wdl b/bowtie.wdl index b3f3ceae..7fb1b614 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -28,30 +26,31 @@ task Bowtie { Array[File] readsDownstream = [] String outputPath = "mapped.bam" Array[File]+ indexFiles - Int? seedmms - Int? seedlen - Int? k Boolean best = false Boolean strata = false Boolean allowContain = false + + Int? seedmms + Int? seedlen + Int? k String? samRG + String picardXmx = "4G" Int threads = 1 - Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) String memory = "~{5 + ceil(size(indexFiles, "G"))}G" - String picardXmx = "4G" + Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" } # Assume fastq input with -q flag. - # The output always needs to be SAM as it is piped into Picard SortSam + # The output always needs to be SAM as it is piped into Picard SortSam. # Hence, the --sam flag is used. - command { set -e -o pipefail mkdir -p "$(dirname ~{outputPath})" - bowtie -q \ + bowtie \ + -q \ --sam \ ~{"--seedmms " + seedmms} \ ~{"--seedlen " + seedlen} \ @@ -84,24 +83,22 @@ task Bowtie { } parameter_meta { + # inputs readsUpstream: {description: "The first-/single-end fastq files.", category: "required"} readsDownstream: {description: "The second-end fastq files.", category: "common"} outputPath: {description: "The location the output BAM file should be written to.", category: "common"} indexFiles: {description: "The index files for bowtie.", category: "required"} - seedmms: {description: "Equivalent to bowtie's `--seedmms` option.", category: "advanced"} - seedlen: {description: "Equivalent to bowtie's `--seedlen` option.", category: "advanced"} - k: {description: "Equivalent to bowtie's `-k` option.", category: "advanced"} best: {description: "Equivalent to bowtie's `--best` flag.", category: "advanced"} strata: {description: "Equivalent to bowtie's `--strata` flag.", category: "advanced"} allowContain: {description: "Equivalent to bowtie's `--allow-contain` flag.", category: "advanced"} + seedmms: {description: "Equivalent to bowtie's `--seedmms` option.", category: "advanced"} + seedlen: {description: "Equivalent to bowtie's `--seedlen` option.", category: "advanced"} + k: {description: "Equivalent to bowtie's `-k` option.", category: "advanced"} samRG: {description: "Equivalent to bowtie's `--sam-RG` option.", category: "advanced"} - - picardXmx: {description: "The maximum memory available to the picard (used for sorting the output). Should be lower than `memory` to accommodate JVM overhead and bowtie's memory usage.", - category: "advanced"} + picardXmx: {description: "The maximum memory available to the picard (used for sorting the output). Should be lower than `memory` to accommodate JVM overhead and bowtie's memory usage.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 6ea4578d..34cd38a6 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -26,33 +26,35 @@ task Mem { File? read2 BwaIndex bwaIndex String outputPrefix - String? readgroup Boolean sixtyFour = false Boolean usePostalt = false - Int threads = 4 - Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - Int? memoryGb + + String? readgroup + Int? sortThreads + Int? memoryGb + + Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } - # Samtools sort may block the pipe while it is writing data to disk. + # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA-mem2's index files contain 2 BWT indexes of which only one is used. .2bit64 is used by default and + # BWA-mem2's index files contain 2 BWT indexes of which only one is used. .2bit64 is used by default and # .8bit32 is used for avx2. # The larger one of these is the 8bit32 index. Since we do not know beforehand which one is used we need to accomodate for that. - # Using only the 8bit32 index uses 57,5% of the index files. Since bwa-mem2 uses slightly more memory than the index + # Using only the 8bit32 index uses 57,5% of the index files. Since bwa-mem2 uses slightly more memory than the index. # We put it at 62% as a safety factor. That means the memory usage for bwa-mem will be 53G for a human genome. Resulting in 60G total # on 8 cores with samtools with 3 sort threads. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 0.62) + sortMemoryPerThreadGb * totalSortThreads - # The bwa postalt script is out commented as soon as usePostalt = false. + # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e @@ -81,7 +83,7 @@ task Mem { runtime { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. - cpu: threads + cpu: threads memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage @@ -92,21 +94,21 @@ task Mem { read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} - usePostalt: {description: "Whether to use the postalt script from bwa kit."} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} - readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} - threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: "The produced BAM file." + outputBam: {description: "The produced BAM file."} + outputHla: {description: "The produced HLA file."} } } diff --git a/bwa.wdl b/bwa.wdl index fdeb870f..0f09f7a9 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -26,28 +26,30 @@ task Mem { File? read2 BwaIndex bwaIndex String outputPrefix - String? readgroup Boolean sixtyFour = false Boolean usePostalt = false - Int threads = 4 - Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 - Int? memoryGb + + String? readgroup + Int? sortThreads + Int? memoryGb + + Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } - # Samtools sort may block the pipe while it is writing data to disk. + # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) - # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. + # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads - # The bwa postalt script is out commented as soon as usePostalt = false. + # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. command { set -e @@ -76,7 +78,7 @@ task Mem { runtime { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. - cpu: threads + cpu: threads memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage @@ -87,21 +89,21 @@ task Mem { read1: {description: "The first-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bwaIndex: {description: "The BWA index, including (optionally) a .alt file.", category: "required"} - usePostalt: {description: "Whether to use the postalt script from bwa kit."} outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} - readgroup: {description: "A readgroup identifier.", category: "common"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} - threads: {description: "The number of threads to use for alignment.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + usePostalt: {description: "Whether to use the postalt script from bwa kit."} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + readgroup: {description: "A readgroup identifier.", category: "common"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} + threads: {description: "The number of threads to use for alignment.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: "The produced BAM file." + outputBam: {description: "The produced BAM file."} + outputHla: {description: "The produced HLA file."} } } diff --git a/ccs.wdl b/ccs.wdl index cab15fea..4446937b 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -22,19 +22,20 @@ version 1.0 task CCS { input { + File subreadsFile + String outputPrefix Int minPasses = 3 Int minLength = 10 Int maxLength = 50000 Boolean byStrand = false Float minReadQuality = 0.99 String logLevel = "WARN" - File subreadsFile + File? subreadsIndexFile String? chunkString - String outputPrefix - - Int cores = 2 - String memory = "2G" + + Int threads = 2 + String memory = "4G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" } @@ -49,7 +50,7 @@ task CCS { ~{true="--by-strand" false="" byStrand} \ --min-rq ~{minReadQuality} \ --log-level ~{logLevel} \ - --num-threads ~{cores} \ + --num-threads ~{threads} \ ~{"--chunk " + chunkString} \ ~{"--report-json " + outputPrefix + ".ccs.report.json"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ @@ -65,7 +66,7 @@ task CCS { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -73,17 +74,17 @@ task CCS { parameter_meta { # inputs + subreadsFile: {description: "Subreads input file.", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} minPasses: {description: "Minimum number of full-length subreads required to generate ccs for a ZMW.", category: "advanced"} minLength: {description: "Minimum draft length before polishing.", category: "advanced"} maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} - subreadsFile: {description: "Subreads input file.", category: "required"} subreadsIndexFile: {description: "Index for the subreads input file, required when using chunkString.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} - outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/centrifuge.wdl b/centrifuge.wdl index 1e7a0b45..1637abdd 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -94,13 +94,13 @@ task Build { task Classify { input { + Array[File]+ read1 + Array[File] read2 = [] String inputFormat = "fastq" Boolean phred64 = false Int minHitLength = 22 Array[File]+ indexFiles - Array[File]+ read1 String outputPrefix - Array[File] read2 = [] Int? trim5 Int? trim3 @@ -155,13 +155,13 @@ task Classify { parameter_meta { # inputs + read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} + read2: {description: "List of files containing mate 2s.", category: "common"} inputFormat: {description: "The format of the read file(s).", category: "required"} phred64: {description: "If set to true, phred+64 encoding is used.", category: "required"} minHitLength: {description: "Minimum length of partial hits.", category: "required"} indexFiles: {description: "The files of the index for the reference genomes.", category: "required"} - read1: {description: "List of files containing mate 1s, or unpaired reads.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - read2: {description: "List of files containing mate 2s.", category: "common"} trim5: {description: "Trim bases from 5' (left) end of each read before alignment.", category: "common"} trim3: {description: "Trim bases from 3' (right) end of each read before alignment.", category: "common"} reportMaxDistinct: {description: "It searches for at most distinct, primary assignments for each read or pair.", category: "common"} diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 115c5ca4..844d6990 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -25,6 +25,7 @@ task ChunkedScatter { File inputFile String prefix = "./scatter" Boolean splitContigs = false + Int? chunkSize Int? overlap Int? minimumBasesPerFile @@ -57,15 +58,16 @@ task ChunkedScatter { } parameter_meta { + # inputs inputFile: {description: "Either a bed file describing regiosn of intrest or a sequence dictionary.", category: "required"} prefix: {description: "The prefix for the output files.", category: "advanced"} + splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} chunkSize: {description: "Equivalent to chunked-scatter's `-c` option.", category: "advanced"} overlap: {description: "Equivalent to chunked-scatter's `-o` option.", category: "advanced"} minimumBasesPerFile: {description: "Equivalent to chunked-scatter's `-m` option.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -76,9 +78,11 @@ task ScatterRegions { String prefix = "scatters/scatter-" Boolean splitContigs = false Int scatterSizeMillions = 1000 + Int? scatterSize - Int timeMinutes = 2 + String memory = "256M" + Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" } @@ -105,15 +109,14 @@ task ScatterRegions { } parameter_meta { + # inputs inputFile: {description: "The input file, either a bed file or a sequence dict. Which format is used is detected by the extension: '.bed', '.fai' or '.dict'.", category: "required"} prefix: {description: "The prefix of the ouput files. Output will be named like: .bed, in which N is an incrementing number. Default 'scatter-'.", category: "advanced"} splitContigs: {description: "If set, contigs are allowed to be split up over multiple files.", category: "advanced"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/clever.wdl b/clever.wdl index 3a6515f7..75e889b3 100644 --- a/clever.wdl +++ b/clever.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -74,12 +72,12 @@ task Mateclever { indexedFiteredBam: {description: "The index of the filtered bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} predictions: {description: "The predicted deletions (VCF) from clever.", category: "required"} - maxOffset: {description: "The maximum center distance between split-read and read-pair deletion to be considered identical.", category: "advanced"} - maxLengthDiff: {description: "The maximum length difference between split-read and read-pair deletion to be considered identical.", category: "advanced"} - cleverMaxDelLength: {description: "The maximum deletion length to look for in Clever predictions.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - threads: {description: "The the number of threads required to run a program", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + cleverMaxDelLength: {description: "The maximum deletion length to look for in Clever predictions.", category: "advanced"} + maxLengthDiff: {description: "The maximum length difference between split-read and read-pair deletion to be considered identical.", category: "advanced"} + maxOffset: {description: "The maximum center distance between split-read and read-pair deletion to be considered identical.", category: "advanced"} + threads: {description: "The the number of threads required to run a program.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } @@ -128,8 +126,8 @@ task Prediction { bamIndex: {description: "The index bam file.", category: "required"} bwaIndex: {description: "The BWA index files.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - threads: {description: "The the number of threads required to run a program", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + threads: {description: "The the number of threads required to run a program.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/collect-columns.wdl b/collect-columns.wdl index fe41c5e8..67db6179 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -24,12 +24,13 @@ task CollectColumns { input { Array[File]+ inputTables String outputPath + Boolean header = false + Boolean sumOnDuplicateId = false + Int? featureColumn Int? valueColumn Int? separator Array[String]? sampleNames - Boolean header = false - Boolean sumOnDuplicateId = false Array[String]? additionalAttributes File? referenceGtf String? featureAttribute @@ -67,20 +68,20 @@ task CollectColumns { } parameter_meta { + # inputs inputTables: {description: "The tables from which columns should be taken.", category: "required"} outputPath: {description: "The path to which the output should be written.", category: "required"} + header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} + sumOnDuplicateId: {description: "Equivalent to the -S flag of collect-columns.", category: "advanced"} featureColumn: {description: "Equivalent to the -f option of collect-columns.", category: "advanced"} valueColumn: {description: "Equivalent to the -c option of collect-columns.", category: "advanced"} separator: {description: "Equivalent to the -s option of collect-columns.", category: "advanced"} sampleNames: {description: "Equivalent to the -n option of collect-columns.", category: "advanced"} - header: {description: "Equivalent to the -H flag of collect-columns.", category: "advanced"} - sumOnDuplicateId: {description: "Equivalent to the -S flag of collect-columns.", category: "advanced"} additionalAttributes: {description: "Equivalent to the -a option of collect-columns.", category: "advanced"} referenceGtf: {description: "Equivalent to the -g option of collect-columns.", category: "advanced"} featureAttribute: {description: "Equivalent to the -F option of collect-columns.", category: "advanced"} - memoryGb: {description: "The maximum amount of memory the job will need in GB", category: "advanced"} + memoryGb: {description: "The maximum amount of memory the job will need in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/common.wdl b/common.wdl index e96cc1c8..b3878bb6 100644 --- a/common.wdl +++ b/common.wdl @@ -45,7 +45,7 @@ task CheckFileMD5 { input { File file String md5 - # By default cromwell expects /bin/bash to be present in the container + # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" @@ -71,7 +71,7 @@ task ConcatenateTextFiles { Boolean zip = false } - # When input and output is both compressed decompression is not needed + # When input and output is both compressed decompression is not needed. String cmdPrefix = if (unzip && !zip) then "zcat " else "cat " String cmdSuffix = if (!unzip && zip) then " | gzip -c " else "" @@ -116,8 +116,8 @@ task Copy { } task CreateLink { - # Making this of type File will create a link to the copy of the file in the execution - # folder, instead of the actual file. + # Making this of type File will create a link to the copy of the file in + # the execution folder, instead of the actual file. # This cannot be propperly call-cached or used within a container. input { String inputFile @@ -182,6 +182,7 @@ task TextToFile { input { String text String outputFile = "out.txt" + Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -194,18 +195,19 @@ task TextToFile { File out = outputFile } - parameter_meta { - text: {description: "The text to print", category: "required"} - outputFile: {description: "The name of the output file", category: "common"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } runtime { memory: "1G" time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + text: {description: "The text to print.", category: "required"} + outputFile: {description: "The name of the output file.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } } task YamlToJson { @@ -213,11 +215,12 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - Int timeMinutes = 1 String memory = "128M" + Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" } + command { set -e mkdir -p "$(dirname ~{outputJson})" @@ -230,6 +233,7 @@ task YamlToJson { json.dump(content, output_json) CODE } + output { File json = outputJson } @@ -241,12 +245,12 @@ task YamlToJson { } parameter_meta { + # inputs yaml: {description: "The YAML file to convert.", category: "required"} outputJson: {description: "The location the output JSON file should be written to.", category: "advanced"} memory: {description: "The maximum amount of memory the job will need.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/cutadapt.wdl b/cutadapt.wdl index 7faeaff1..74f57912 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -32,6 +32,14 @@ task Cutadapt { Array[String] adapterRead2 = [] Array[String] frontRead2 = [] Array[String] anywhereRead2 = [] + String reportPath = "cutadapt_report.txt" + # Cutadapt compresses the zipped output files with a ridiculously + # high compression level (5 or 6). + # This is not the fast compression preset. It takes up to 400% more + # CPU time for a 20% reduction in file size. + # Hence we use compression level 1 here. + Int compressionLevel = 1 # This only affects outputs with the .gz suffix. + Boolean? interleaved String? pairFilter Float? errorRate @@ -52,7 +60,7 @@ task Cutadapt { String? stripSuffix String? prefix String? suffix - Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads. + Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads. Int? maximumLength Int? maxN Boolean? discardTrimmed @@ -73,11 +81,7 @@ task Cutadapt { Boolean? bwa Boolean? zeroCap Boolean? noZeroCap - String reportPath = "cutadapt_report.txt" - # Cutadapt compresses the zipped output files with a ridiculously high compression level (5 or 6). - # This is not the fast compression preset. It takes up to 400% more CPU time for a 20% reduction in file size. - # Hence we use compression level 1 here. - Int compressionLevel = 1 # This only affects outputs with the .gz suffix. + Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) @@ -152,8 +156,8 @@ task Cutadapt { output{ File cutRead1 = read1output - File? cutRead2 = read2output File report = reportPath + File? cutRead2 = read2output File? tooLongOutput=tooLongOutputPath File? tooShortOutput=tooShortOutputPath File? untrimmedOutput=untrimmedOutputPath @@ -173,22 +177,19 @@ task Cutadapt { } parameter_meta { + # inputs read1: {description: "The first or single end fastq file to be run through cutadapt.", category: "required"} read2: {description: "An optional second end fastq file to be run through cutadapt.", category: "common"} read1output: {description: "The name of the resulting first or single end fastq file.", category: "common"} read2output: {description: "The name of the resulting second end fastq file.", category: "common"} - adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "common"} - front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced"} - anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", - category: "advanced"} - adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", - category: "common"} - frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced"} - anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", - category: "advanced"} + adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "common"} + front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"} + anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"} + adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", category: "common"} + frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"} + anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"} + reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", category: "common"} + compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"} interleaved: {description: "Equivalent to cutadapt's --interleaved flag.", category: "advanced"} pairFilter: {description: "Equivalent to cutadapt's --pair-filter option.", category: "advanced"} errorRate: {description: "Equivalent to cutadapt's --error-rate option.", category: "advanced"} @@ -230,13 +231,9 @@ task Cutadapt { bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"} zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"} noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"} - reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", - category: "common"} - compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"} cores: {description: "The number of cores to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/deepvariant.wdl b/deepvariant.wdl index f5661886..20bf8e27 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -28,6 +28,7 @@ task RunDeepVariant { File inputBamIndex String modelType String outputVcf + String? postprocessVariantsExtraArgs File? customizedModel Int? numShards @@ -43,7 +44,6 @@ task RunDeepVariant { command { set -e - /opt/deepvariant/bin/run_deepvariant \ --ref ~{referenceFasta} \ --reads ~{inputBam} \ @@ -59,36 +59,36 @@ task RunDeepVariant { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } output { File outputVCF = outputVcf File outputVCFIndex = outputVCF + ".tbi" + Array[File] outputVCFStatsReport = glob("*.visual_report.html") File? outputGVCF = outputGVcf File? outputGVCFIndex = outputGVcf + ".tbi" - Array[File] outputVCFStatsReport = glob("*.visual_report.html") } - + parameter_meta { - referenceFasta: {description: "Genome reference to use", category: "required"} + # inputs + referenceFasta: {description: "Genome reference to use.", category: "required"} referenceFastaIndex: {description: "Index for the genome reference file.", category: "required"} inputBam: {description: "Aligned, sorted, indexed BAM file containing the reads we want to call.", category: "required"} inputBamIndex: {description: "Index for the input bam file.", category: "required"} - modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag", category: "required"} + modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag.", category: "required"} outputVcf: {description: "Path where we should write VCF file.", category: "required"} - customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used", category: "advanced"} + postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} + customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used"., category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} - postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From b131d926dd3cb7e2dc59adecb015fa09d1e3d3bc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:11:41 +0100 Subject: [PATCH 0666/1208] Edit another batch of tasks to uniform layout. --- bam2fastx.wdl | 10 +- delly.wdl | 6 +- fastqc.wdl | 49 ++-- fastqsplitter.wdl | 22 +- flash.wdl | 12 +- gatk.wdl | 612 +++++++++++++++++++++------------------------- gffcompare.wdl | 2 +- 7 files changed, 338 insertions(+), 375 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index e8884ab0..1b911dbb 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -37,18 +37,18 @@ task Bam2Fasta { command { set -e - mkdir -p "$(dirname ~{outputPrefix})"' + mkdir -p "$(dirname ~{outputPrefix})" # Localise the bam and pbi files so they are next to each other in the # current folder. bamFiles="" - for bamFile in ~{sep=" " bam}; + for bamFile in ~{sep=" " bam} do ln ${bamFile} . bamFiles=${bamFiles}" $(basename ${bamFile})" done - for index in ~{sep=" " bamIndex}; + for index in ~{sep=" " bamIndex} do ln ${index} . done @@ -110,13 +110,13 @@ task Bam2Fastq { # Localise the bam and pbi files so they are next to each other in the # current folder. bamFiles="" - for bamFile in ~{sep=" " bam}; + for bamFile in ~{sep=" " bam} do ln ${bamFile} . bamFiles=${bamFiles}" $(basename ${bamFile})" done - for index in ~{sep=" " bamIndex}; + for index in ~{sep=" " bamIndex} do ln ${index} . done diff --git a/delly.wdl b/delly.wdl index f708f494..ffe9023a 100644 --- a/delly.wdl +++ b/delly.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -59,9 +57,9 @@ task CallSV { bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} - referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The memory required to run the programs", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/fastqc.wdl b/fastqc.wdl index 04b6813f..dd3dfc2e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -29,6 +29,7 @@ task Fastqc { Boolean noFilter = false Boolean extract = false Boolean nogroup = false + Int? minLength String? format File? contaminants @@ -37,32 +38,35 @@ task Fastqc { Int? kmers String? dir - Int threads = 1 # Set javaXmx a little high. Equal to fastqc default with 7 threads. # This is because some fastq files need more memory. 2G per core # is a nice cluster default, so we use all the rest of the memory for # fastqc so we should have as little OOM crashes as possible even with # weird edge case fastq's. - String javaXmx="1750M" + String javaXmx="1750M" + Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" - Array[File]? NoneArray - File? NoneFile + String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0 + + Array[File]? noneArray + File? noneFile } # Chops of the .gz extension if present. - # The Basename needs to be taken here. Otherwise paths might differ between similar jobs. + # The Basename needs to be taken here. Otherwise paths might differ + # between similar jobs. String name = basename(sub(seqFile, "\.gz$","")) - # This regex chops of the extension and replaces it with _fastqc for the reportdir. + # This regex chops of the extension and replaces it with _fastqc for + # the reportdir. # Just as fastqc does it. String reportDir = outdirPath + "/" + sub(name, "\.[^\.]*$", "_fastqc") - # We reimplement the perl wrapper here. This has the advantage that it gives - # us more control over the amount of memory used. + # We reimplement the perl wrapper here. This has the advantage that it + # gives us more control over the amount of memory used. command <<< set -e - mkdir -p ~{outdirPath} + mkdir -p "~{outdirPath}" FASTQC_DIR="/usr/local/opt/fastqc-0.11.9" export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \ @@ -86,23 +90,24 @@ task Fastqc { >>> output { - File? rawReport = if extract then reportDir + "/fastqc_data.txt" else NoneFile File htmlReport = reportDir + ".html" File reportZip = reportDir + ".zip" - File? summary = if extract then reportDir + "/summary.txt" else NoneFile - Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else NoneArray + File? summary = if extract then reportDir + "/summary.txt" else noneFile + File? rawReport = if extract then reportDir + "/fastqc_data.txt" else noneFile + Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else noneArray } runtime { cpu: threads memory: memory - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs seqFile: {description: "A fastq file.", category: "required"} - outdirPath: {description: "The path to write the output to", catgory: "required"} + outdirPath: {description: "The path to write the output to.", catgory: "required"} casava: {description: "Equivalent to fastqc's --casava flag.", category: "advanced"} nano: {description: "Equivalent to fastqc's --nano flag.", category: "advanced"} noFilter: {description: "Equivalent to fastqc's --nofilter flag.", category: "advanced"} @@ -115,18 +120,16 @@ task Fastqc { limits: {description: "Equivalent to fastqc's --limits option.", category: "advanced"} kmers: {description: "Equivalent to fastqc's --kmers option.", category: "advanced"} dir: {description: "Equivalent to fastqc's --dir option.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} threads: {description: "The number of cores to use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { WDL_AID: { - exclude: ["NoneFile", "NoneArray"] + exclude: ["noneFile", "noneArray"] } } } @@ -155,14 +158,14 @@ task GetConfiguration { } runtime { - memory: "2G" # Needs more than 1 to pull the docker image + memory: "2G" # Needs more than 1 to pull the docker image. time_minute: timeMinutes docker: dockerImage } parameter_meta { + # inputs timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/fastqsplitter.wdl b/fastqsplitter.wdl index c523cf8a..25a50954 100644 --- a/fastqsplitter.wdl +++ b/fastqsplitter.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -26,19 +24,24 @@ task Fastqsplitter { input { File inputFastq Array[String]+ outputPaths - String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1" + Int? compressionLevel Int? threadsPerFile - # fastqplitter utilizes one thread per input file and one or more threads per output file + one thread for the application. - # Since a compression level of 1 is used, each output file uses approx 0.5 cores. + + # fastqplitter utilizes one thread per input file and one or + # more threads per output file + one thread for the application. + # Since a compression level of 1 is used, each output file + # uses approx 0.5 cores. Int cores = 1 + ceil(0.5 * length(outputPaths)) + String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1" } # Busybox mkdir does not accept multiple paths. command <<< set -e for FILE in ~{sep=' ' outputPaths} - do mkdir -p "$(dirname $FILE)" + do + mkdir -p "$(dirname ${FILE})" done fastqsplitter \ ~{"-c " + compressionLevel} \ @@ -51,15 +54,16 @@ task Fastqsplitter { Array[File] chunks = outputPaths } - # Using very safe margins here. 10MB/300MB per outputfile is used for single-threaded/multi-threaded compression. + # Using very safe margins here. 10MB/300MB per outputfile is used for + # single-threaded/multi-threaded compression. Float memoryPerFile = if select_first([threadsPerFile, 1]) > 1 then 0.40 else 0.02 Int fastqsplitterMemory = ceil(0.100 + memoryPerFile * length(outputPaths)) - # Make sure a minimum of 2 GB is present to pull the singularity image + # Make sure a minimum of 2 GB is present to pull the singularity image. Int memory = if fastqsplitterMemory <= 2 then 2 else fastqsplitterMemory runtime { + cpu: cores memory: "~{memory}G" docker: dockerImage - cpu: cores } } diff --git a/flash.wdl b/flash.wdl index 6e704921..c4554c50 100644 --- a/flash.wdl +++ b/flash.wdl @@ -24,13 +24,14 @@ import "common.wdl" as common task Flash { input { - String? preCommand FastqPair inputFastq String outdirPath String outPrefix = "flash" + Boolean compress = true + + String? preCommand Int? minOverlap Int? maxOverlap - Boolean compress = true Int threads = 2 String memory = "2G" @@ -55,8 +56,8 @@ task Flash { File notCombined1 = outdirPath + "/" + outPrefix + ".notCombined_1.fastq.gz" File notCombined2 = outdirPath + "/" + outPrefix + ".notCombined_2.fastq.gz" FastqPair notCombined = object { - R1: notCombined1, - R2: notCombined2 + R1: notCombined1, + R2: notCombined2 } File hist = outdirPath + "/" + outPrefix + ".hist" File histogram = outdirPath + "/" + outPrefix + ".histogram" @@ -66,5 +67,4 @@ task Flash { cpu: threads memory: memory } - -} \ No newline at end of file +} diff --git a/gatk.wdl b/gatk.wdl index 12416dda..cc5d1de5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -28,12 +28,13 @@ task AnnotateIntervals { String annotatedIntervalsPath = "intervals.annotated.tsv" File intervals String intervalMergingRule = "OVERLAPPING_ONLY" + Int featureQueryLookahead = 1000000 + File? mappabilityTrack File? segmentalDuplicationTrack - Int featureQueryLookahead = 1000000 - String memory = "3G" String javaXmx = "2G" + String memory = "3G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -57,9 +58,9 @@ task AnnotateIntervals { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -71,17 +72,15 @@ task AnnotateIntervals { intervalMergingRule: {description: "Equivalent to gatk AnnotateIntervals' `--interval-merging-rule` option.", category: "advanced"} mappabilityTrack: {description: "Equivalent to gatk AnnotateIntervals' `--mappability-track` option.", category: "common"} segmentalDuplicationTrack: {description: "Equivalent to gatk AnnotateIntervals' `--segmenta-duplicarion-track` option.", category: "common"} - featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option", category: "advanced"} + featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Apply Base Quality Score Recalibration (BQSR) model +# Apply Base Quality Score Recalibration (BQSR) model. task ApplyBQSR { input { File inputBam @@ -93,9 +92,11 @@ task ApplyBQSR { File referenceFastaDict File referenceFastaFai - Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 2048 - Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. + Int memoryMb = javaXmxMb + 512 + # This will likely be used with intervals, as such size based + # estimation can't be used. + Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -124,33 +125,29 @@ task ApplyBQSR { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The BAM file which should be recalibrated.", category: "required"} inputBamIndex: {description: "The input BAM file's index.", category: "required"} outputBamPath: {description: "The location the resulting BAM file should be written.", category: "required"} recalibrationReport: {description: "The BQSR report the be used for recalibration.", category: "required"} sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Generate Base Quality Score Recalibration (BQSR) model +# Generate Base Quality Score Recalibration (BQSR) model. task BaseRecalibrator { input { File inputBam @@ -159,14 +156,15 @@ task BaseRecalibrator { Array[File] sequenceGroupInterval = [] Array[File] knownIndelsSitesVCFs = [] Array[File] knownIndelsSitesVCFIndexes = [] - File? dbsnpVCF - File? dbsnpVCFIndex File referenceFasta File referenceFastaDict File referenceFastaFai - Int memoryMb = javaXmxMb + 512 + File? dbsnpVCF + File? dbsnpVCFIndex + Int javaXmxMb = 1024 + Int memoryMb = javaXmxMb + 512 Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -190,42 +188,39 @@ task BaseRecalibrator { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The BAM file to generate a BQSR report for.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} recalibrationReportPath: {description: "The location to write the BQSR report to.", category: "required"} sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"} knownIndelsSitesVCFs: {description: "VCF files with known indels.", category: "advanced"} knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs.", category: "advanced"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task CalculateContamination { input { File tumorPileups + File? normalPileups - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -246,20 +241,19 @@ task CalculateContamination { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs tumorPileups: {description: "The pileup summary of a tumor/case sample.", category: "required"} normalPileups: {description: "The pileup summary of the normal/control sample.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -268,8 +262,8 @@ task CallCopyRatioSegments { String outputPrefix File copyRatioSegments - String memory = "3G" String javaXmx = "2G" + String memory = "3G" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -289,20 +283,19 @@ task CallCopyRatioSegments { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputPrefix: {description: "The prefix for the output files.", category: "required"} copyRatioSegments: {description: "The copy ratios file generated by gatk ModelSegments.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -310,15 +303,16 @@ task CollectAllelicCounts { input { String allelicCountsPath = "allelic_counts.tsv" File commonVariantSites - File? commonVariantSitesIndex File inputBam File inputBamIndex File referenceFasta File referenceFastaDict File referenceFastaFai - String memory = "11G" + File? commonVariantSitesIndex + String javaXmx = "10G" + String memory = "11G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -339,26 +333,25 @@ task CollectAllelicCounts { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs allelicCountsPath: {description: "The path the output should be written to.", category: "advanced"} commonVariantSites: {description: "Interval list or vcf of common variant sites (to retrieve the allelic counts for).", category: "required"} - commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"} inputBam: {description: "The BAM file to generate counts for.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -373,8 +366,8 @@ task CollectReadCounts { File referenceFastaFai String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "8G" String javaXmx = "7G" + String memory = "8G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -397,12 +390,13 @@ task CollectReadCounts { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs countsPath: {description: "The location the output should be written to.", category: "advanced"} intervals: {description: "The intervals to collect counts for.", category: "required"} inputBam: {description: "The BAM file to determine the coverage for.", category: "required"} @@ -411,12 +405,10 @@ task CollectReadCounts { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} intervalMergingRule: {description: "Equivalent to gatk CollectReadCounts' `--interval-merging-rule` option.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -430,8 +422,8 @@ task CombineGVCFs { File referenceFastaDict File referenceFastaFai - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -453,28 +445,24 @@ task CombineGVCFs { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gvcfFiles: {description: "The GVCF files to be combined.", category: "required"} gvcfFilesIndex: {description: "The indexes for the GVCF files.", caregory: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} outputPath: {description: "The location the combined GVCF should be written to.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -486,12 +474,12 @@ task CombineVariants { String genotypeMergeOption = "UNIQUIFY" String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED" Array[String]+ identifiers - Array[File]+ variantVcfs # follow "identifiers" array order + Array[File]+ variantVcfs # Follow "identifiers" array order. Array[File]+ variantIndexes String outputPath - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" } @@ -499,17 +487,17 @@ task CombineVariants { command <<< set -e mkdir -p "$(dirname ~{outputPath})" - - # build "-V: " arguments according to IDs and VCFs to merge - # Make sure commands are run in bash + # Build "-V: " arguments according to IDs + # and VCFs to merge. + # Make sure commands are run in bash. V_args=$(bash -c ' set -eu ids=(~{sep=" " identifiers}) vars=(~{sep=" " variantVcfs}) for (( i = 0; i < ${#ids[@]}; ++i )) - do + do printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}" - done + done ') java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 -jar /usr/GenomeAnalysisTK.jar \ -T CombineVariants \ @@ -526,12 +514,13 @@ task CombineVariants { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} @@ -540,14 +529,11 @@ task CombineVariants { identifiers: {description: "The sample identifiers in the same order as variantVcfs.", category: "required"} variantVcfs: {description: "The input VCF files in the same order as identifiers.", category: "required"} variantIndexes: {description: "The indexes of the input VCF files.", category: "required"} - outputPath: {description: "The location the output should be written to", category: "required"} - + outputPath: {description: "The location the output should be written to.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -555,10 +541,11 @@ task CreateReadCountPanelOfNormals { input { String PONpath = "PON.hdf5" Array[File]+ readCountsFiles + File? annotatedIntervals - String memory = "8G" String javaXmx = "7G" + String memory = "8G" Int timeMinutes = 5 String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason... } @@ -578,34 +565,33 @@ task CreateReadCountPanelOfNormals { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs PONpath: {description: "The location the PON should be written to.", category: "common"} readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "required"} - annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", - category: "advanced"} + annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task DenoiseReadCounts { input { - File? PON - File? annotatedIntervals File readCounts String outputPrefix - String memory = "5G" + File? PON + File? annotatedIntervals + String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -628,23 +614,21 @@ task DenoiseReadCounts { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"} - annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.", - category: "advanced"} + # inputs readCounts: {description: "The read counts file as generated by CollectReadCounts.", category: "required"} outputPrefix: {description: "The prefix for the output files.", category: "required"} + PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"} + annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -656,14 +640,15 @@ task FilterMutectCalls { File unfilteredVcf File unfilteredVcfIndex String outputVcf + Int uniqueAltReadCount = 4 + File mutect2Stats + File? contaminationTable File? mafTumorSegments File? artifactPriors - Int uniqueAltReadCount = 4 - File mutect2Stats - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -692,41 +677,39 @@ task FilterMutectCalls { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} unfilteredVcf: {description: "An unfiltered VCF file as produced by Mutect2.", category: "required"} unfilteredVcfIndex: {description: "The index of the unfiltered VCF file.", category: "required"} outputVcf: {description: "The location the filtered VCF file should be written.", category: "required"} + uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"} + mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"} contaminationTable: {description: "Equivalent to FilterMutectCalls' `--contamination-table` option.", category: "advanced"} mafTumorSegments: {description: "Equivalent to FilterMutectCalls' `--tumor-segmentation` option.", category: "advanced"} artifactPriors: {description: "Equivalent to FilterMutectCalls' `--ob-priors` option.", category: "advanced"} - uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"} - mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Combine multiple recalibration tables from scattered BaseRecalibrator runs +# Combine multiple recalibration tables from scattered BaseRecalibrator runs. task GatherBqsrReports { input { Array[File] inputBQSRreports String outputReportPath - Int memoryMb = 256 + javaXmxMb Int javaXmxMb = 256 + Int memoryMb = 256 + javaXmxMb Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -745,21 +728,19 @@ task GatherBqsrReports { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBQSRreports: {description: "The BQSR reports to be merged.", category: "required"} outputReportPath: {description: "The location of the combined BQSR report.", category: "required"} - + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -770,9 +751,11 @@ task GenomicsDBImport { Array[File]+ intervals String genomicsDBWorkspacePath = "genomics_db" String genomicsDBTarFile = "genomics_db.tar.gz" + String? tmpDir - String memory = "5G" + String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -794,25 +777,23 @@ task GenomicsDBImport { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gvcfFiles: {description: "The gvcfFiles to be merged.", category: "required"} gvcfFilesIndex: {description: "Indexes for the gvcfFiles.", category: "required"} intervals: {description: "intervals over which to operate.", category: "required"} - genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored", category: "advanced"} - genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored", category: "advanced"} - tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers", - category: "advanced"} + genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored.", category: "advanced"} + genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored.", category: "advanced"} + tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -820,18 +801,19 @@ task GenotypeGVCFs { input { File gvcfFile File gvcfFileIndex - Array[File]? intervals String outputPath File referenceFasta File referenceFastaDict File referenceFastaFai Array[String] annotationGroups = ["StandardAnnotation"] + + Array[File]? intervals File? dbsnpVCF File? dbsnpVCFIndex File? pedigree - String memory = "7G" String javaXmx = "6G" + String memory = "7G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -854,35 +836,31 @@ task GenotypeGVCFs { output { File outputVCF = outputPath File outputVCFIndex = outputPath + ".tbi" - } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"} gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"} - intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"} outputPath: {description: "The location to write the output VCF file to.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - annotationGroups: {description: "Which annotation groups will be used for the annotation", category: "advanced"} + annotationGroups: {description: "Which annotation groups will be used for the annotation.", category: "advanced"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} + pedigree: {description: "Pedigree file for determining the population \"founders\".", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -896,8 +874,8 @@ task GetPileupSummaries { File sitesForContaminationIndex String outputPrefix - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -917,12 +895,13 @@ task GetPileupSummaries { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs sampleBam: {description: "A BAM file for which a pileup should be created.", category: "required"} sampleBamIndex: {description: "The index of the input BAM file.", category: "required"} variantsForContamination: {description: "A VCF file with common variants.", category: "required"} @@ -930,13 +909,10 @@ task GetPileupSummaries { sitesForContamination: {description: "A bed file describing regions to operate on.", category: "required"} sitesForContaminationIndex: {description: "The index for the bed file.", category: "required"} outputPrefix: {description: "The prefix for the ouput.", category: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -945,26 +921,27 @@ task HaplotypeCaller { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Array[File]+? intervalList - Array[File]+? excludeIntervalList String outputPath File referenceFasta File referenceFastaIndex File referenceFastaDict + Boolean gvcf = false + String emitRefConfidence = if gvcf then "GVCF" else "NONE" + Boolean dontUseSoftClippedBases = false + + Array[File]+? intervalList + Array[File]+? excludeIntervalList Float? contamination File? dbsnpVCF File? dbsnpVCFIndex File? pedigree Int? ploidy String? outputMode - Boolean gvcf = false - String emitRefConfidence = if gvcf then "GVCF" else "NONE" - Boolean dontUseSoftClippedBases = false Float? standardMinConfidenceThresholdForCalling - Int memoryMb = javaXmxMb + 512 - # Memory increases with time used. 4G should cover most use cases. Int javaXmxMb = 4096 + # Memory increases with time used. 4G should cover most use cases. + Int memoryMb = javaXmxMb + 512 Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -995,50 +972,44 @@ task HaplotypeCaller { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} - excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} outputPath: {description: "The location to write the output to.", category: "required"} - ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} - gvcf: {description: "Whether the output should be a gvcf", category: "common"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaIndex: {description: "The index for the reference fasta file.", category: "required"} - contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} - outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.", - category: "advanced"} - emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'", - category: "advanced"} + gvcf: {description: "Whether the output should be a gvcf.", category: "common"} + emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'.", category: "advanced"} dontUseSoftClippedBases: {description: "Do not use soft-clipped bases. Should be 'true' for RNA variant calling.", category: "common"} - standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"} + intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} + excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"} + contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"} + pedigree: {description: "Pedigree file for determining the population \"founders\".", category: "common"} + ploidy: {description: "The ploidy with which the variants should be called.", category: "common"} + outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.", category: "advanced"} + standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - task LearnReadOrientationModel { input { Array[File]+ f1r2TarGz - String memory = "13G" String javaXmx = "12G" + String memory = "13G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1056,19 +1027,18 @@ task LearnReadOrientationModel { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs f1r2TarGz: {description: "A f1r2TarGz file outputed by mutect2.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1076,8 +1046,8 @@ task MergeStats { input { Array[File]+ stats - String memory = "15G" String javaXmx = "14G" + String memory = "15G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1095,19 +1065,18 @@ task MergeStats { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs stats: {description: "Statistics files to be merged.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1117,14 +1086,13 @@ task ModelSegments { String outputPrefix File denoisedCopyRatios File allelicCounts - File? normalAllelicCounts - Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts) - then 0 - else 30 + Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts) then 0 else 30 Int maximumNumberOfSmoothingIterations = 10 - String memory = "11G" + File? normalAllelicCounts + String javaXmx = "10G" + String memory = "11G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1145,7 +1113,6 @@ task ModelSegments { output { File hetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.tsv" - File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" File copyRatioSegments = outputDir + "/" + outputPrefix + ".cr.seg" File copyRatioCBS = outputDir + "/" + outputPrefix + ".cr.igv.seg" File alleleFractionCBS = outputDir + "/" + outputPrefix + ".af.igv.seg" @@ -1155,29 +1122,28 @@ task ModelSegments { File modeledSegments = outputDir + "/" + outputPrefix + ".modelFinal.seg" File copyRatioParameters = outputDir + "/" + outputPrefix + ".modelFinal.cr.param" File alleleFractionParameters = outputDir + "/" + outputPrefix + ".modelFinal.af.param" + File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv" } runtime { - docker: dockerImage - time_minute: timeMinutes memory: memory + time_minute: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputDir: {description: "The directory to write the ouput to.", category: "common"} outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} allelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts.", category: "required" } - normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"} minimumTotalAlleleCountCase: {description: "Equivalent to gatk ModelSeqments' `--minimum-total-allele-count-case` option.", category: "advanced"} maximumNumberOfSmoothingIterations: {description: "Equivalent to gatk ModelSeqments' `--maximum-number-of-smoothing-iterations` option.", category: "advanced"} - + normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1190,17 +1156,18 @@ task MuTect2 { File referenceFastaFai String outputVcf String tumorSample + String f1r2TarGz = "f1r2.tar.gz" + Array[File]+ intervals + String outputStats = outputVcf + ".stats" + String? normalSample File? germlineResource File? germlineResourceIndex File? panelOfNormals File? panelOfNormalsIndex - String f1r2TarGz = "f1r2.tar.gz" - Array[File]+ intervals - String outputStats = outputVcf + ".stats" - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1229,12 +1196,13 @@ task MuTect2 { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} @@ -1242,20 +1210,18 @@ task MuTect2 { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputVcf: {description: "The location to write the output VCF file to.", category: "required"} tumorSample: {description: "The name of the tumor/case sample.", category: "required"} + f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"} + intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"} + outputStats: {description: "The location the output statistics should be written to.", category: "advanced"} normalSample: {description: "The name of the normal/control sample.", category: "common"} germlineResource: {description: "Equivalent to Mutect2's `--germline-resource` option.", category: "advanced"} germlineResourceIndex: {description: "The index for the germline resource.", category: "advanced"} panelOfNormals: {description: "Equivalent to Mutect2's `--panel-of-normals` option.", category: "advanced"} panelOfNormalsIndex: {description: "The index for the panel of normals.", category: "advanced"} - f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"} - intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"} - outputStats: {description: "The location the output statistics should be written to.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1266,10 +1232,11 @@ task PlotDenoisedCopyRatios { String outputPrefix File standardizedCopyRatios File denoisedCopyRatios + Int? minimumContigLength - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1289,32 +1256,31 @@ task PlotDenoisedCopyRatios { output { File denoisedCopyRatiosPlot = outputDir + "/" + outputPrefix + ".denoised.png" - File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" File standardizedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".standardizedMAD.txt" File denoisedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".denoisedMAD.txt" File deltaMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".deltaMAD.txt" File deltaScaledMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".scaledDeltaMAD.txt" + File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} outputDir: {description: "The directory to write the ouput to.", category: "common"} outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} - denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} standardizedCopyRatios: {description: "The standardized copy ratios as generated by DenoiseReadCounts.", category: "required"} + denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"} minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1326,10 +1292,11 @@ task PlotModeledSegments { File denoisedCopyRatios File segments File allelicCounts + Int? minimumContigLength - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1353,12 +1320,13 @@ task PlotModeledSegments { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"} outputDir: {description: "The directory to write the ouput to.", category: "common"} outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"} @@ -1366,12 +1334,10 @@ task PlotModeledSegments { segments: {description: "The modeled segments as generated by ModelSegments.", category: "required"} allelicCounts: {description: "The hetrozygous allelic counts as generated by ModelSegments.", category: "required"} minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1380,14 +1346,15 @@ task PreprocessIntervals { File referenceFasta File referenceFastaDict File referenceFastaFai - File? intervals String outputIntervalList = "bins.interval_list" Int binLength = if defined(intervals) then 0 else 1000 Int padding = if defined(intervals) then 250 else 0 String intervalMergingRule = "OVERLAPPING_ONLY" - String memory = "4G" + File? intervals + String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1411,41 +1378,42 @@ task PreprocessIntervals { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - referenceFasta: {description: "The reference fasta file..", category: "required"} + # inputs + referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"} outputIntervalList: {description: "The location the output should be written to.", category: "advanced"} binLength: {description: "The size of the bins to be created. Should be 0 for targeted/exome sequencing.", category: "advanced"} padding: {description: "The padding to be added to the bins. Should be 0 if contiguos binning is used, eg with WGS.", category: "advanced"} intervalMergingRule: {description: "Equivalent to gatk PreprocessIntervals' `--interval-merging-rule` option.", category: "advanced"} + intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task SelectVariants { input { + File inputVcf + File inputVcfIndex File referenceFasta File referenceFastaDict File referenceFastaFai - File inputVcf - File inputVcfIndex String outputPath = "output.vcf.gz" - String? selectTypeToInclude Array[File] intervals = [] - String memory = "5G" + + String? selectTypeToInclude + String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1468,29 +1436,25 @@ task SelectVariants { } runtime { - docker: dockerImage - time_minute: timeMinutes memory: memory + time_minute: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputVcf: {description: "The VCF input file.", category: "required"} inputVcfIndex: {description: "The input VCF file's index.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - selectTypeToInclude: {description: "Select only a certain type of variants from the input file", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "advanced"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} - + selectTypeToInclude: {description: "Select only a certain type of variants from the input file.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1504,8 +1468,8 @@ task SplitNCigarReads { String outputBam Array[File] intervals = [] - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1527,28 +1491,24 @@ task SplitNCigarReads { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The BAM file for which spliced reads should be split.", category: "required"} inputBamIndex: {description: "The input BAM file's index.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputBam: {description: "The location the output BAM file should be written.", category: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -1558,11 +1518,6 @@ task VariantEval { Array[File] evalVcfsIndex Array[File] comparisonVcfs = [] Array[File] comparisonVcfsIndex = [] - File? referenceFasta - File? referenceFastaDict - File? referenceFastaFai - File? dbsnpVCF - File? dbsnpVCFIndex Array[File] intervals = [] String outputPath = "eval.table" Boolean doNotUseAllStandardModules = false @@ -1572,8 +1527,14 @@ task VariantEval { Array[String] samples = [] Boolean mergeEvals = false - String memory = "5G" + File? referenceFasta + File? referenceFastaDict + File? referenceFastaFai + File? dbsnpVCF + File? dbsnpVCFIndex + String javaXmx = "4G" + String memory = "5G" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" @@ -1604,35 +1565,37 @@ task VariantEval { runtime { cpu: 1 - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } + parameter_meta { + # inputs evalVcfs: {description: "Variant sets to evaluate.", category: "required"} evalVcfsIndex: {description: "Indexes for the variant sets.", category: "required"} comparisonVcfs: {description: "Compare set vcfs.", category: "advanced"} comparisonVcfsIndex: {description: "Indexes for the compare sets.", category: "advanced"} - evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true)", category: "common"} - stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true)", category: "common"} - samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"} # Advanced because this description is impossible to understand... - mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track", category: "common"} + intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} + outputPath: {description: "The location the output table should be written.", category: "advanced"} doNotUseAllStandardModules: {description: "Do not use the standard modules by default (instead, only those that are specified with the evalModules option).", category: "common"} doNotUseAllStandardStratifications: {description: "Do not use the standard stratification modules by default (instead, only those that are specified with the stratificationModules option).", category: "common"} + evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true).", category: "common"} + stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true).", category: "common"} + samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"} + mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "common"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "common"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "common"} dbsnpVCF: {description: "A dbSNP VCF.", category: "common"} dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"} - outputPath: {description: "The location the output table should be written.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } + task VariantFiltration { input { File inputVcf @@ -1644,8 +1607,8 @@ task VariantFiltration { Array[String]+ filterArguments Array[File] intervals = [] - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1668,29 +1631,24 @@ task VariantFiltration { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputVcf: {description: "The VCF to be filtered.", category: "required"} inputVcfIndex: {description: "The input VCF file's index.", category: "required"} - referenceFasta: {description: "The reference fasta file which was also used for mapping.", - category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2'].", category: "required"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"} - filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2']", - category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/gffcompare.wdl b/gffcompare.wdl index e5f62b5e..5d80f619 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -147,4 +147,4 @@ task GffCompare { exclude: ["noneFile"] } } -} \ No newline at end of file +} From 08d6519a05a9e297decbe81e0e29c633ea07e14f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:27:38 +0100 Subject: [PATCH 0667/1208] Try to fix Travis error. --- bam2fastx.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 1b911dbb..0585de23 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -112,13 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - ln ${bamFile} . - bamFiles=${bamFiles}" $(basename ${bamFile})" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln ${index} . + ln $index . done bam2fastq \ @@ -126,7 +126,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} + $bamFiles } output { From 9f77348d7a353e93b1f2a57b02942a93107ea634 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:30:59 +0100 Subject: [PATCH 0668/1208] Fix second task as well. --- bam2fastx.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0585de23..2ad08581 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - ln ${bamFile} . - bamFiles=${bamFiles}" $(basename ${bamFile})" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln ${index} . + ln $index . done bam2fasta \ From 840a37d19727ee6edb790287cfd447e9964ce669 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 30 Oct 2020 17:39:16 +0100 Subject: [PATCH 0669/1208] Fix a third Travis error. --- deepvariant.wdl | 2 +- gffcompare.wdl | 44 +++++++++++++++++++++++--------------------- gffread.wdl | 2 +- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 20bf8e27..8b08e111 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -81,7 +81,7 @@ task RunDeepVariant { modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag.", category: "required"} outputVcf: {description: "Path where we should write VCF file.", category: "required"} postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"} - customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used"., category: "advanced"} + customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} diff --git a/gffcompare.wdl b/gffcompare.wdl index 5d80f619..8bd53091 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -22,16 +22,11 @@ version 1.0 task GffCompare { input { - File? inputGtfList Array[File] inputGtfFiles File referenceAnnotation - String? outputDir - String outPrefix = "gffcmp" # gffcmp is the default used by the program as well. This - # needs to be defined in order for the output values to be consistent and correct. - File? genomeSequences - Int? maxDistanceFreeEndsTerminalExons - Int? maxDistanceGroupingTranscriptStartSites - String? namePrefix + # gffcmp is the default used by the program as well. This needs to be + # defined in order for the output values to be consistent and correct. + String outPrefix = "gffcmp" Boolean C = false Boolean A = false Boolean X = false @@ -44,15 +39,22 @@ task GffCompare { Boolean verbose = false Boolean debugMode = false + File? inputGtfList + String? outputDir + File? genomeSequences + Int? maxDistanceFreeEndsTerminalExons + Int? maxDistanceGroupingTranscriptStartSites + String? namePrefix + Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" # This workaround only works in the input section. - # Issue addressed at https://github.com/openwdl/wdl/pull/263 + # Issue addressed at https://github.com/openwdl/wdl/pull/263. File? noneFile # This is a wdl workaround. Please do not assign! } - # This allows for the creation of output directories + # This allows for the creation of output directories. String dirPrefix = if defined(outputDir) then select_first([outputDir]) + "/" else "" @@ -93,22 +95,22 @@ task GffCompare { then "annotated" else "combined" - # Check if a redundant .gtf will be created + # Check if a redundant .gtf will be created. Boolean createRedundant = C || A || X output { + # noneFile is not stable. Please replace this as soon as wdl spec allows. File annotated = totalPrefix + "." + annotatedName + ".gtf" File loci = totalPrefix + ".loci" File stats = totalPrefix + ".stats" File tracking = totalPrefix + ".tracking" - # noneFile is not stable. Please replace this as soon as wdl spec allows + Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons]) File? redundant = if createRedundant then totalPrefix + ".redundant.gtf" else noneFile File? missedIntrons = if debugMode then totalPrefix + ".missed_introns.gtf" else noneFile - Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons]) } runtime { @@ -117,15 +119,10 @@ task GffCompare { } parameter_meta { - inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"} + # inputs inputGtfFiles: {description: "The input GTF files.", category: "required"} referenceAnnotation: {description: "The GTF file to compare with.", category: "required"} - outputDir: {description: "The location the output should be written.", category: "common"} outPrefix: {description: "The prefix for the output.", category: "advanced"} - genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"} - maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} - maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} - namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} C: {description: "Equivalent to gffcompare's `-C` flag.", category: "advanced"} A: {description: "Equivalent to gffcompare's `-A` flag.", category: "advanced"} X: {description: "Equivalent to gffcompare's `-X` flag.", category: "advanced"} @@ -137,9 +134,14 @@ task GffCompare { noTmap: {description: "Equivalent to gffcompare's `-T` flag.", category: "advanced"} verbose: {description: "Equivalent to gffcompare's `-V` flag.", category: "advanced"} debugMode: {description: "Equivalent to gffcompare's `-D` flag.", category: "advanced"} + inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"} + outputDir: {description: "The location the output should be written.", category: "common"} + genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"} + maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} + maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} + namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { diff --git a/gffread.wdl b/gffread.wdl index d83e4d76..76ee20d1 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -79,4 +79,4 @@ task GffRead { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From ca4fe2d92f42b2c32b42197deeef204cec07762f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 08:54:32 +0100 Subject: [PATCH 0670/1208] Add another batch of updates. --- CHANGELOG.md | 1 + gatk.wdl | 1 + gffread.wdl | 16 +++++++------ gridss.wdl | 15 +++++++------ hisat2.wdl | 32 +++++++++++++------------- htseq.wdl | 13 ++++++----- isoseq3.wdl | 18 +++++++-------- lima.wdl | 10 ++++----- macs2.wdl | 2 +- manta.wdl | 19 +++++++++------- minimap2.wdl | 27 +++++++++++----------- multiqc.wdl | 63 ++++++++++++++++++++++++++-------------------------- nanopack.wdl | 10 ++++----- 13 files changed, 119 insertions(+), 108 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c04b582..028c7400 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. diff --git a/gatk.wdl b/gatk.wdl index cc5d1de5..7aa2915c 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -64,6 +64,7 @@ task AnnotateIntervals { } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} diff --git a/gffread.wdl b/gffread.wdl index 76ee20d1..343011e9 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -24,19 +24,21 @@ task GffRead { input { File inputGff File genomicSequence + Boolean outputGtfFormat = false + File? genomicIndex # Optional. GFFRead can create this by itself. String? exonsFastaPath String? CDSFastaPath String? proteinFastaPath String? filteredGffPath - Boolean outputGtfFormat = false + Int timeMinutes = 1 + ceil(size(inputGff) * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } # The mkdirs below are hackish. It should be - # ~{"mkir -p $(dirname " + somePath + ")"} - # but this goes wrong. Cromwell will always use ')' even if somepath is not defined. + # ~{"mkir -p $(dirname " + somePath + ")"} but this goes wrong. + # Cromwell will always use ')' even if somepath is not defined. # Which leads to crashing. command { set -e @@ -62,21 +64,21 @@ task GffRead { } runtime { - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputGff: {description: "The input GFF file.", category: "required"} genomicSequence: {description: "The genome.", category: "required"} + outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"} genomicIndex: {description: "The genome's index.", category: "advanced"} exonsFastaPath: {description: "The location the exons fasta should be written to.", category: "advanced"} CDSFastaPath: {description: "The location the CDS fasta should be written to.", category: "advanced"} proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"} filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} - outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/gridss.wdl b/gridss.wdl index 44b9e9f1..9499be5e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -27,11 +27,12 @@ task GRIDSS { File tumorBam File tumorBai String tumorLabel + BwaIndex reference + String outputPrefix = "gridss" + File? normalBam File? normalBai String? normalLabel - BwaIndex reference - String outputPrefix = "gridss" Int jvmHeapSizeGb = 30 Int threads = 1 @@ -68,17 +69,17 @@ task GRIDSS { } parameter_meta { + # inputs tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"} tumorBai: {description: "The index for tumorBam.", category: "required"} tumorLabel: {description: "The name of the (tumor) sample.", category: "required"} + reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} + outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} - reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"} - outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"} - + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling.",category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} - jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/hisat2.wdl b/hisat2.wdl index f9a4bc59..b52bf70f 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -22,9 +22,9 @@ version 1.0 task Hisat2 { input { - Array[File]+ indexFiles File inputR1 File? inputR2 + Array[File]+ indexFiles String outputBam String sample String library @@ -32,22 +32,22 @@ task Hisat2 { String platform = "illumina" Boolean downstreamTranscriptomeAssembly = true String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt" - - Int threads = 4 - Int? sortThreads Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 + + Int? sortThreads + + Int threads = 4 Int? memoryGb Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads) # quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1 - # is a combination of hisat2 and samtools - # hisat2=2.2.0, samtools=1.10 + # is a combination of hisat2 and samtools hisat2=2.2.0 & samtools=1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2880dd9d8ad0a7b221d4eacda9a818e92983128d-0" } - # Samtools sort may block the pipe while it is writing data to disk. + # Samtools sort may block the pipe while it is writing data to disk. # This can lead to cpu underutilization. - # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. + # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads. Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) Int estimatedMemoryGb = 1 + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads @@ -81,16 +81,17 @@ task Hisat2 { } runtime { - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" cpu: threads + memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" time_minutes: timeMinutes docker: dockerImage } parameter_meta { - indexFiles: {description: "The hisat2 index files.", category: "required"} + # inputs inputR1: {description: "The first-/single-end FastQ file.", category: "required"} inputR2: {description: "The second-end FastQ file.", category: "common"} + indexFiles: {description: "The hisat2 index files.", category: "required"} outputBam: {description: "The location the output BAM file should be written to.", category: "required"} sample: {description: "The sample id.", category: "required"} library: {description: "The library id.", category: "required"} @@ -98,13 +99,12 @@ task Hisat2 { platform: {description: "The platform used for sequencing.", category: "advanced"} downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"} summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"} + sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} diff --git a/htseq.wdl b/htseq.wdl index cbd8e2ac..cf527535 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -27,9 +27,10 @@ task HTSeqCount { String outputTable = "output.tsv" String order = "pos" String stranded = "no" + Array[String] additionalAttributes = [] + String? featureType String? idattr - Array[String] additionalAttributes = [] Int nprocesses = 1 String memory = "8G" @@ -58,24 +59,24 @@ task HTSeqCount { runtime { cpu: nprocesses - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes docker: dockerImage } parameter_meta { + # inputs inputBams: {description: "The input BAM files.", category: "required"} gtfFile: {description: "A GTF/GFF file containing the features of interest.", category: "required"} outputTable: {description: "The path to which the output table should be written.", category: "common"} - nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"} order: {description: "Equivalent to the -r option of htseq-count.", category: "advanced"} stranded: {description: "Equivalent to the -s option of htseq-count.", category: "common"} + additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"} featureType: {description: "Equivalent to the --type option of htseq-count.", category: "advanced"} idattr: {description: "Equivalent to the --idattr option of htseq-count.", category: "advanced"} - additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"} + nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"} memory: {description: "The amount of memory the job requires in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/isoseq3.wdl b/isoseq3.wdl index 5060f0e7..c1c4397c 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -31,7 +31,7 @@ task Refine { String outputDir String outputNamePrefix - Int cores = 2 + Int threads = 2 String memory = "2G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" @@ -44,7 +44,7 @@ task Refine { --min-polya-length ~{minPolyALength} \ ~{true="--require-polya" false="" requirePolyA} \ --log-level ~{logLevel} \ - --num-threads ~{cores} \ + --num-threads ~{threads} \ --log-file "~{outputDir}/~{outputNamePrefix}.stderr.log" \ ~{inputBamFile} \ ~{primerFile} \ @@ -61,7 +61,7 @@ task Refine { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -77,7 +77,7 @@ task Refine { primerFile: {description: "Barcode/primer fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} outputNamePrefix: {description: "Basename of the output files.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/lima.wdl b/lima.wdl index 7ef9d4ab..1da4ef5e 100644 --- a/lima.wdl +++ b/lima.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/macs2.wdl b/macs2.wdl index fad3cb00..757eaf67 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -54,4 +54,4 @@ task PeakCalling { memory: memory docker: dockerImage } -} \ No newline at end of file +} diff --git a/manta.wdl b/manta.wdl index 5382d2a5..a7b7cf38 100644 --- a/manta.wdl +++ b/manta.wdl @@ -27,9 +27,10 @@ task Germline { File referenceFasta File referenceFastaFai String runDir = "./manta_run" + Boolean exome = false + File? callRegions File? callRegionsIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -71,9 +72,9 @@ task Germline { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" } runDir: {description: "The directory to use as run/output directory.", category: "common"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The the number of cores required to run a program", category: "required"} memoryGb: {description: "The memory required to run the manta", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -85,14 +86,15 @@ task Somatic { input { File tumorBam File tumorBamIndex - File? normalBam - File? normalBamIndex File referenceFasta File referenceFastaFai String runDir = "./manta_run" + Boolean exome = false + + File? normalBam + File? normalBamIndex File? callRegions File? callRegionsIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -138,16 +140,17 @@ task Somatic { } parameter_meta { + # inputs tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} - normalBam: {description: "The normal/control sample's BAM file.", category: "common"} - normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} runDir: {description: "The directory to use as run/output directory.", category: "common"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/minimap2.wdl b/minimap2.wdl index fb31fb7f..1b719da6 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -61,7 +61,7 @@ task Indexing { } parameter_meta { - # input + # inputs useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for pacbio).", category: "advanced"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} minimizerWindowSize: {description: "Minimizer window size.", category: "advanced"} @@ -73,7 +73,7 @@ task Indexing { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # output + # outputs indexFile: {description: "Indexed reference file."} } } @@ -137,27 +137,28 @@ task Mapping { } parameter_meta { + # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} + skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} + secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} + referenceFile: {description: "Reference fasta file.", category: "required"} + queryFile: {description: "Input fasta file.", category: "required"} maxIntronLength: {description: "Max intron length (effective with -xsplice; changing -r).", category: "advanced"} maxFragmentLength: {description: "Max fragment length (effective with -xsr or in the fragment mode).", category: "advanced"} - skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} - addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} - secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} - referenceFile: {description: "Reference fasta file.", category: "required"} - queryFile: {description: "Input fasta file.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # output + # outputs alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} } } diff --git a/multiqc.wdl b/multiqc.wdl index 7dcf333e..647394e9 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -22,16 +22,28 @@ version 1.0 task MultiQC { input { - # Use a string here so cromwell does not relocate an entire analysis directory + # Use a string here so cromwell does not relocate an entire + # analysis directory. Array[File] reports Boolean force = false Boolean dirs = false - Int? dirsDepth Boolean fullNames = false + String outDir = "." + Boolean dataDir = false + Boolean zipDataDir = true + Boolean export = false + Boolean flat = false + Boolean interactive = true + Boolean lint = false + Boolean pdf = false + # This must be actively enabled in my opinion. + # The tools default is to upload. + Boolean megaQCUpload = false + + Int? dirsDepth String? title String? comment String? fileName - String outDir = "." String? template String? tag String? ignore @@ -40,21 +52,15 @@ task MultiQC { File? fileList Array[String]+? exclude Array[String]+? module - Boolean dataDir = false String? dataFormat - Boolean zipDataDir = true - Boolean export = false - Boolean flat = false - Boolean interactive = true - Boolean lint = false - Boolean pdf = false - Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload. File? config # A directory String? clConfig + String? memory Int timeMinutes = 2 + ceil(size(reports, "G") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" } + Int memoryGb = 2 + ceil(size(reports, "G")) # This is where the reports end up. It does not need to be changed by the @@ -69,8 +75,9 @@ task MultiQC { # By hashing the parent path we make sure there are no file colissions as # files from the same directory end up in the same directory, while files # from other directories get their own directory. Cromwell also uses this - # strategy. Using python's builtin hash is unique enough for these purposes. - + # strategy. Using python's builtin hash is unique enough + # for these purposes. + command { python3 < Date: Mon, 2 Nov 2020 09:17:33 +0100 Subject: [PATCH 0671/1208] Address travis error. --- CHANGELOG.md | 3 +++ fastqc.wdl | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 028c7400..c331112c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Bwa & bwa-mem2: Add parameter_meta for `outputHla`. ++ Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. ++ Bam2fastx: Add localisation of input files to Bam2Fasta task. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. + CCS: Update CCS to version 5. diff --git a/fastqc.wdl b/fastqc.wdl index dd3dfc2e..feeeaae5 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -47,7 +47,7 @@ task Fastqc { Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0 + String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" Array[File]? noneArray File? noneFile From 163290340ff4f5ed0488c69d2c194dbb3428a423 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 11:57:06 +0100 Subject: [PATCH 0672/1208] Add another batch of updated tasks. --- CHANGELOG.md | 4 + centrifuge.wdl | 4 +- ncbi.wdl | 51 ++++---- pbbam.wdl | 10 +- pbmm2.wdl | 13 ++- picard.wdl | 312 +++++++++++++++++++++++-------------------------- rtg.wdl | 79 ++++++------- sambamba.wdl | 57 +++++---- 8 files changed, 257 insertions(+), 273 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c331112c..f0dfaf1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Picard: Add parameter_meta to `SortSam`. ++ pbmm2: Add parameter_meta for `sample`. ++ Centrifuge: Rename output in task `KReport` to `KrakenReport` to resolve + name collision with task name. + Bwa & bwa-mem2: Add parameter_meta for `outputHla`. + Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. + Bam2fastx: Add localisation of input files to Bam2Fasta task. diff --git a/centrifuge.wdl b/centrifuge.wdl index 1637abdd..07dc7f85 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -270,7 +270,7 @@ task KReport { >>> output { - File KReport = outputPrefix + "_kreport.tsv" + File KrakenReport = outputPrefix + "_kreport.tsv" } runtime { @@ -294,7 +294,7 @@ task KReport { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - KReport: {description: "File with kraken style report."} + KrakenReport: {description: "File with kraken style report."} } } diff --git a/ncbi.wdl b/ncbi.wdl index d157d902..da753bac 100644 --- a/ncbi.wdl +++ b/ncbi.wdl @@ -23,6 +23,10 @@ version 1.0 task GenomeDownload { input { String outputPath + Boolean verbose = true + Boolean debug = false + String executable = "ncbi-genome-download" + String? section = "refseq" String? format = "all" String? assemblyLevel = "all" @@ -32,11 +36,7 @@ task GenomeDownload { String? ncbiBaseUri Int? parallel Int? retries - Boolean verbose = true - Boolean debug = false String? domain = "all" - - String executable = "ncbi-genome-download" String? preCommand } @@ -58,22 +58,22 @@ task GenomeDownload { ~{true="--debug" false ="" debug } \ ~{domain} - # Check md5sums for all downloaded files + # Check md5sums for all downloaded files. for folder in $(realpath ~{outputPath})/*/*/* - do - ( - md5sums="$( - cd $folder - for file in * - do - if [[ ! $file == "MD5SUMS" ]] - then - grep $file MD5SUMS - fi - done - )" - cd $folder; echo $md5sums | md5sum -c) - done + do + ( + md5sums="$( + cd $folder + for file in * + do + if [[ ! $file == "MD5SUMS" ]] + then + grep $file MD5SUMS + fi + done + )" + cd $folder; echo $md5sums | md5sum -c) + done } output { @@ -106,7 +106,7 @@ task DownloadNtFasta{ mkdir -p ~{ntDir} rsync -av --partial rsync://ftp.ncbi.nih.gov/blast/db/FASTA/nt.gz* ~{ntDir} (cd ~{ntDir} && md5sum -c nt.gz.md5) - # Only unzip when necessary + # Only unzip when necessary. if ~{true='true' false='false' unzip} then zcat ~{ntDir}/nt.gz > ~{ntFilePath} @@ -132,15 +132,16 @@ task DownloadAccessionToTaxId { command { set -e -o pipefail mkdir -p ~{downloadDir} - rsync -av \ - --partial \ - rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* \ - ~{downloadDir} + rsync \ + -av \ + --partial \ + rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* \ + ~{downloadDir} (cd ~{downloadDir} && md5sum -c *.md5) for file in ~{downloadDir}/nucl_*.accession2taxid.gz do zcat $file | tail -n +2 | cut -f 2,3 ~{true="| gzip" false='' gzip} > \ - $file.seqtaxmap~{true='.gz' false='' gzip} + $file.seqtaxmap~{true='.gz' false='' gzip} done } diff --git a/pbbam.wdl b/pbbam.wdl index 52737a00..d271a11a 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -18,12 +18,14 @@ version 1.0 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. task Index { input { File bamFile + String? outputBamPath - + String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" @@ -60,11 +62,9 @@ task Index { parameter_meta { # inputs bamFile: {description: "The BAM file for which an index should be made.", category: "required"} - outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", - category: "common"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/pbmm2.wdl b/pbmm2.wdl index 31d4c667..5fda1c87 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -58,9 +58,10 @@ task Mapping { } parameter_meta { + # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} - sample: {description: "Name of the sample"} + sample: {description: "Name of the sample.", category: "required"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -68,7 +69,7 @@ task Mapping { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # output + # outputs outputAlignmentFile: {description: "Mapped bam file."} outputIndexFile: {description: "Bam index file."} } diff --git a/picard.wdl b/picard.wdl index 49db8b8b..f1876f7b 100644 --- a/picard.wdl +++ b/picard.wdl @@ -26,8 +26,8 @@ task BedToIntervalList { File dict String outputPath = "regions.interval_list" - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -47,9 +47,9 @@ task BedToIntervalList { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -57,12 +57,10 @@ task BedToIntervalList { bedFile: {description: "A bed file.", category: "required"} dict: {description: "A sequence dict file.", category: "required"} outputPath: {description: "The location the output interval list should be written to.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -74,17 +72,19 @@ task CollectHsMetrics { File referenceFastaDict File referenceFastaFai File targets - File? baits String basename + File? baits + # Use the targets file as baits as a fallback, since often the baits # for a certain capture kit are not available. File baitsFile = select_first([baits, targets]) File targetsFile = targets - Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 3072 - # Additional * 2 because picard multiple metrics reads the reference fasta twice. + Int memoryMb = javaXmxMb + 512 + # Additional * 2 because picard multiple metrics reads the + # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -106,9 +106,9 @@ task CollectHsMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -116,18 +116,15 @@ task CollectHsMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} targets: {description: "Picard interval file of the capture targets.", category: "required"} - baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -139,7 +136,6 @@ task CollectMultipleMetrics { File referenceFastaDict File referenceFastaFai String basename - Boolean collectAlignmentSummaryMetrics = true Boolean collectInsertSizeMetrics = true Boolean qualityScoreDistribution = true @@ -150,14 +146,13 @@ task CollectMultipleMetrics { Boolean collectSequencingArtifactMetrics = true Boolean collectQualityYieldMetrics = true - Int memoryMb = javaXmxMb + 512 Int javaXmxMb = 3072 + Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } - command { set -e mkdir -p "$(dirname ~{basename})" @@ -173,8 +168,7 @@ task CollectMultipleMetrics { ~{true="PROGRAM=MeanQualityByCycle" false="" meanQualityByCycle} \ ~{true="PROGRAM=CollectBaseDistributionByCycle" false="" collectBaseDistributionByCycle} \ ~{true="PROGRAM=CollectGcBiasMetrics" false="" collectGcBiasMetrics} \ - ~{true="PROGRAM=CollectSequencingArtifactMetrics" false="" - collectSequencingArtifactMetrics} \ + ~{true="PROGRAM=CollectSequencingArtifactMetrics" false="" collectSequencingArtifactMetrics} \ ~{true="PROGRAM=CollectQualityYieldMetrics" false="" collectQualityYieldMetrics} } @@ -221,9 +215,9 @@ task CollectMultipleMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -231,30 +225,21 @@ task CollectMultipleMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", - category: "advanced"} - collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", - category: "advanced"} - qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", - category: "advanced"} + collectAlignmentSummaryMetrics: {description: "Equivalent to the `PROGRAM=CollectAlignmentSummaryMetrics` argument.", category: "advanced"} + collectInsertSizeMetrics: {description: "Equivalent to the `PROGRAM=CollectInsertSizeMetrics` argument.", category: "advanced"} + qualityScoreDistribution: {description: "Equivalent to the `PROGRAM=QualityScoreDistribution` argument.", category: "advanced"} meanQualityByCycle: {description: "Equivalent to the `PROGRAM=MeanQualityByCycle` argument.", category: "advanced"} - collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", - category: "advanced"} + collectBaseDistributionByCycle: {description: "Equivalent to the `PROGRAM=CollectBaseDistributionByCycle` argument.", category: "advanced"} collectGcBiasMetrics: {description: "Equivalent to the `PROGRAM=CollectGcBiasMetrics` argument.", category: "advanced"} - collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", - category: "advanced"} - collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", - category: "advanced"} + collectSequencingArtifactMetrics: {description: "Equivalent to the `PROGRAM=CollectSequencingArtifactMetrics` argument.", category: "advanced"} + collectQualityYieldMetrics: {description: "Equivalent to the `PROGRAM=CollectQualityYieldMetrics` argument.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -266,9 +251,9 @@ task CollectRnaSeqMetrics { String basename String strandSpecificity = "NONE" - String memory = "9G" String javaXmx = "8G" - # With 6 minutes per G there were several timeouts. + String memory = "9G" + # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -286,14 +271,14 @@ task CollectRnaSeqMetrics { } output { - File? chart = basename + ".RNA_Metrics.pdf" File metrics = basename + ".RNA_Metrics" + File? chart = basename + ".RNA_Metrics.pdf" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -302,15 +287,11 @@ task CollectRnaSeqMetrics { inputBamIndex: {description: "The index of the input BAM file.", category: "required"} refRefflat: {description: "A refflat file containing gene annotations.", catehory: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.", - category: "common"} - + strandSpecificity: {description: "Equivalent to the `STRAND_SPECIFICITY` option of picard's CollectRnaSeqMetrics.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -325,8 +306,8 @@ task CollectTargetedPcrMetrics { Array[File]+ targetIntervals String basename - String memory = "4G" String javaXmx = "3G" + String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -352,9 +333,9 @@ task CollectTargetedPcrMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -362,21 +343,15 @@ task CollectTargetedPcrMetrics { inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} inputBamIndex: {description: "The index of the input BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ampliconIntervals: {description: "An interval list describinig the coordinates of the amplicons sequenced.", - category: "required"} - targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.", - category: "required"} + ampliconIntervals: {description: "An interval list describinig the coordinates of the amplicons sequenced.", category: "required"} + targetIntervals: {description: "An interval list describing the coordinates of the targets sequenced.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -388,8 +363,8 @@ task CollectVariantCallingMetrics { File inputVCFIndex String basename - String memory = "9G" String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -410,24 +385,22 @@ task CollectVariantCallingMetrics { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs dbsnp: {description: "DBSNP vcf file to use with CollectVariantCallingMetrics.", category: "required"} dbsnpIndex: {description: "Index file for the DBSNP VCF.", category: "required"} - inputVCF: {description: "Input VCF file", category: "required"} + inputVCF: {description: "Input VCF file.", category: "required"} inputVCFIndex: {description: "Index file for the input VCF.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -436,8 +409,8 @@ task CreateSequenceDictionary { File inputFile String outputDir - String memory = "3G" String javaXmx = "2G" + String memory = "3G" String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -464,8 +437,8 @@ task CreateSequenceDictionary { # inputs inputFile: {description: "The input fasta file.", category: "required"} outputDir: {description: "Output directory path.", category: "required"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs @@ -473,17 +446,19 @@ task CreateSequenceDictionary { } } -# Combine multiple recalibrated BAM files from scattered ApplyRecalibration runs +# Combine multiple recalibrated BAM files from scattered +# ApplyRecalibration runs. task GatherBamFiles { input { Array[File]+ inputBams Array[File]+ inputBamsIndex String outputBamPath + Boolean createMd5File = false - Int memoryMb = javaXmxMb + 512 - Int javaXmxMb = 1024 Int? compressionLevel - Boolean createMd5File = false + + Int javaXmxMb = 1024 + Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" @@ -508,9 +483,9 @@ task GatherBamFiles { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -518,14 +493,12 @@ task GatherBamFiles { inputBams: {description: "The BAM files to be merged together.", category: "required"} inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"} + compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -535,8 +508,8 @@ task GatherVcfs { Array[File]+ inputVcfIndexes String outputVcfPath = "out.vcf.gz" - String memory = "5G" String javaXmx = "4G" + String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -555,9 +528,9 @@ task GatherVcfs { } runtime { - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -565,17 +538,14 @@ task GatherVcfs { inputVcfs: {description: "The VCF files to be merged together.", category: "required"} inputVcfIndexes: {description: "The indexes of the input VCF files.", category: "required"} outputVcfPath: {description: "The path where the merged VCF file will be written.", caregory: "required"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Mark duplicate reads to avoid counting non-independent observations +# Mark duplicate reads to avoid counting non-independent observations. task MarkDuplicates { input { Array[File]+ inputBams @@ -583,31 +553,32 @@ task MarkDuplicates { String metricsPath Int compressionLevel = 1 Boolean createMd5File = false - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. - # NOTE: this might change in the future when the intel deflater is updated! + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). + # Higher compression levels similar to intel deflater. + # NOTE: this might change in the future when the intel + # deflater is updated! Boolean useJdkDeflater = true - # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. + # The program default for READ_NAME_REGEX is appropriate in nearly every case. + # Sometimes we wish to supply "null" in order to turn off optical duplicate detection. + # This can be desirable if you don't mind the estimated library size + # being wrong and optical duplicate detection is taking >7 days and failing. + String? read_name_regex + + # In GATK Best practices pipeline MarkDuplicates is given a 7G VM. # https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L1040 Int javaXmxMb = 6656 # 6.5G String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" - - # The program default for READ_NAME_REGEX is appropriate in nearly every case. - # Sometimes we wish to supply "null" in order to turn off optical duplicate detection - # This can be desirable if you don't mind the estimated library size being wrong and - # optical duplicate detection is taking >7 days and failing - String? read_name_regex } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get # marked correctly. This works because the output of BWA is query-grouped and therefore, # so is the output of MergeBamAlignment. While query-grouped isn't actually query-sorted, - # it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname" - + # it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname". command { set -e mkdir -p "$(dirname ~{outputBamPath})" @@ -625,7 +596,7 @@ task MarkDuplicates { ADD_PG_TAG_TO_READS=false \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -636,9 +607,9 @@ task MarkDuplicates { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: "~{memoryMb}M" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -646,42 +617,39 @@ task MarkDuplicates { inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} - read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} - compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} + read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} + javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} - javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } -# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs +# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs. task MergeVCFs { input { Array[File]+ inputVCFs Array[File]+ inputVCFsIndexes String outputVcfPath - - String memory = "5G" - String javaXmx = "4G" - Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" Int compressionLevel = 1 - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater. + Boolean useJdkInflater = true # Slightly faster than the intel one. + # Better results for compression level 1 (much smaller). + # Higher compression levels similar to intel deflater. # NOTE: this might change in the future when the intel deflater is updated! Boolean useJdkDeflater = true + String javaXmx = "4G" + String memory = "5G" + Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } - # Using MergeVcfs instead of GatherVcfs so we can create indices - # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket - + # Using MergeVcfs instead of GatherVcfs so we can create indices. + # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket. command { set -e mkdir -p "$(dirname ~{outputVcfPath})" @@ -691,7 +659,7 @@ task MergeVCFs { OUTPUT=~{outputVcfPath} \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -700,9 +668,9 @@ task MergeVCFs { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -710,16 +678,13 @@ task MergeVCFs { inputVCFs: {description: "The VCF files to be merged.", category: "required"} inputVCFsIndexes: {description: "The indexes of the VCF files.", category: "required"} outputVcfPath: {description: "The location the output VCF file should be written to.", category: "required"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} - compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -729,10 +694,12 @@ task SamToFastq { File inputBamIndex Boolean paired = true - String memory = "17G" String javaXmx = "16G" # High memory default to avoid crashes. + String memory = "17G" + Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" - File? NONE + + File? noneFile } String outputRead1 = basename(inputBam, "\.[bs]am") + "_R1.fastq.gz" @@ -751,13 +718,20 @@ task SamToFastq { output { File read1 = outputRead1 - File? read2 = if paired then outputRead2 else NONE - File? unpairedRead = if paired then outputUnpaired else NONE + File? read2 = if paired then outputRead2 else noneFile + File? unpairedRead = if paired then outputUnpaired else noneFile } runtime { - docker: dockerImage memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + meta { + WDL_AID: { + exclude: ["noneFile"] + } } } @@ -766,8 +740,8 @@ task ScatterIntervalList { File interval_list Int scatter_count - String memory = "4G" String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -790,8 +764,8 @@ task ScatterIntervalList { } runtime { - docker: dockerImage memory: memory + docker: dockerImage } } @@ -804,7 +778,7 @@ task SortSam { Int maxRecordsInRam = 500000 Int compressionLevel = 1 - # Default ram of 4 GB. Using 125001.0 to prevent an answer of + # Default ram of 4 GB. Using 125001.0 to prevent an answer of # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) @@ -840,13 +814,16 @@ task SortSam { } parameter_meta { - inputBam: {description: "The unsorted input BAM file", category: "required"} + # inputs + inputBam: {description: "The unsorted input BAM file.", category: "required"} outputPath: {description: "The location the output BAM file should be written to.", category: "required"} - XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", - category: "advanced"} + sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"} + createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"} + maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} + compressionLevel: {description: "Compression level for all compressed files created.", category: "advanced"} + XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -854,10 +831,11 @@ task SortVcf { input { Array[File]+ vcfFiles String outputVcfPath + File? dict - String memory = "9G" String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" } @@ -879,9 +857,9 @@ task SortVcf { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -889,13 +867,10 @@ task SortVcf { vcfFiles: {description: "The VCF files to merge and sort.", category: "required"} outputVcfPath: {description: "The location the sorted VCF files should be written to.", category: "required"} dict: {description: "A sequence dictionary matching the VCF files.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -904,8 +879,9 @@ task RenameSample { File inputVcf String outputPath = "./picard/renamed.vcf" String newSampleName - String memory = "9G" + String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" } @@ -925,9 +901,9 @@ task RenameSample { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -935,8 +911,8 @@ task RenameSample { inputVcf: {description: "The VCF file to process.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} newSampleName: {description: "A string to replace the old sample name.", category: "required"} - memory: {description: "The memory required to run the programs", category: "advanced"} - javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/rtg.wdl b/rtg.wdl index 104a5ef9..bfd32957 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -22,13 +22,14 @@ version 1.0 task Format { input { + Array[File]+ inputFiles String format = "fasta" String outputPath = "seq_data.sdf" - Array[File]+ inputFiles - String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" + String rtgMem = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputFiles) * 2) + String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } command { @@ -44,21 +45,20 @@ task Format { } runtime { - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", - category: "advanced"} - outputPath: {description: "Where the output should be placed.", category: "advanced"} + # inputs inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", category: "advanced"} + outputPath: {description: "Where the output should be placed.", category: "advanced"} + rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -68,18 +68,20 @@ task VcfEval { File baselineIndex File calls File callsIndex - File? evaluationRegions - File? bedRegions + Boolean squashPloidy = false + String outputMode = "split" String outputDir = "output/" File template Boolean allRecords = false Boolean decompose = false Boolean refOverlap = false + + File? evaluationRegions + File? bedRegions String? sample - Boolean squashPloidy = false - String outputMode = "split" - Int threads = 1 # tool default is number of cores in the system 😱 + String rtgMem = "8G" + Int threads = 1 # Tool default is number of cores in the system 😱. String memory = "9G" Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" @@ -132,39 +134,32 @@ task VcfEval { } runtime { - docker: dockerImage cpu: threads memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - baseline: {description: "VCF file containing baseline variants", category: "required"} - baselineIndex: {description: "The baseline's VCF index", category: "required"} - calls: {description: "VCF file containing called variants", category: "required"} - callsIndex: {description: "The call's VCF index", category: "required"} - outputDir: {description: "Directory for output", category: "advanced"} - bedRegions: {description: "if set, only read VCF records that overlap the ranges contained in the specified BED file", category: "advanced"} - evaluationRegions: {description: "if set, evaluate within regions contained in the supplied BED file, allowing transborder matches. To be used for truth-set high-confidence regions or other regions of interest where region boundary effects should be minimized", - category: "advanced"} - template: {description: "SDF of the reference genome the variants are called against", category: "required"} - allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\")", - category: "common"} - decompose: {description: "decompose complex variants into smaller constituents to allow partial credit", category: "common"} - refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap)", - category: "common"} - sample: {description: "the name of the sample to select. Use , to select different sample names for baseline and calls. (Required when using multi-sample VCF files)", - category: "common"} - squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences", - category: "common"} - outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split)", - category: "advanced"} - threads: {description: "Number of threads. Default is 1", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"} + # inputs + baseline: {description: "VCF file containing baseline variants.", category: "required"} + baselineIndex: {description: "The baseline's VCF index.", category: "required"} + calls: {description: "VCF file containing called variants.", category: "required"} + callsIndex: {description: "The call's VCF index.", category: "required"} + squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences.", category: "common"} + outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split).", category: "advanced"} + outputDir: {description: "Directory for output.", category: "advanced"} + template: {description: "SDF of the reference genome the variants are called against.", category: "required"} + allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\").", category: "common"} + decompose: {description: "decompose complex variants into smaller constituents to allow partial credit.", category: "common"} + refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap).", category: "common"} + sample: {description: "the name of the sample to select. Use , to select different sample names for baseline and calls. (Required when using multi-sample VCF files).", category: "common"} + bedRegions: {description: "if set, only read VCF records that overlap the ranges contained in the specified BED file.", category: "advanced"} + evaluationRegions: {description: "if set, evaluate within regions contained in the supplied BED file, allowing transborder matches. To be used for truth-set high-confidence regions or other regions of interest where region boundary effects should be minimized.", category: "advanced"} + rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"} + threads: {description: "Number of threads. Default is 1.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/sambamba.wdl b/sambamba.wdl index cd8da21e..df5ab4d1 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -20,29 +20,31 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - task Markdup { input { Array[File] inputBams String outputPath - # Sambamba scales like this: 1 thread is fully utilized (1). 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7. - # 2 threads reduces wall clock time by more than 40%. - Int threads = 2 Int compressionLevel = 1 - Int? hashTableSize - Int? overFlowListSize - # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1 + # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1. Int sortBufferSize = 2048 Int ioBufferSize = 128 - Boolean removeDuplicates = false + Boolean removeDuplicates = false + Int? hashTableSize + Int? overFlowListSize + + # Sambamba scales like this: 1 thread is fully utilized (1). + # 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7. + # 2 threads reduces wall clock time by more than 40%. + Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize - String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } + String bamIndexPath = sub(outputPath, "\.bam$", ".bai") command { @@ -57,7 +59,7 @@ task Markdup { ~{"--sort-buffer-size " + sortBufferSize} \ ~{"--io-buffer-size " + ioBufferSize} \ ~{sep=' ' inputBams} ~{outputPath} - # sambamba creates an index for us + # sambamba creates an index for us. mv ~{outputPath}.bai ~{bamIndexPath} } @@ -67,8 +69,8 @@ task Markdup { } runtime { - memory: "~{memoryMb}M" cpu: threads + memory: "~{memoryMb}M" time_minutes: timeMinutes docker: dockerImage } @@ -78,17 +80,19 @@ task Markdup { inputBams: {description: "The input BAM files.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "required"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"} - removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"} - hashTableSize: {description: "Sets sambamba's hash table size", category: "advanced"} - overFlowListSize: {description: "Sets sambamba's overflow list size", category: "advanced"} - sortBufferSize: {description: "The amount of mb allocated to the sort buffer", category: "advanced"} + sortBufferSize: {description: "The amount of mb allocated to the sort buffer.", category: "advanced"} ioBufferSize: {description: "The amount of mb allocated to each IO buffer. Sambamba uses two IO buffers.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"} + hashTableSize: {description: "Sets sambamba's hash table size.", category: "advanced"} + overFlowListSize: {description: "Sets sambamba's overflow list size.", category: "advanced"} threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} } } @@ -98,14 +102,15 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 1 + Int memoryPerThreadGb = 4 + Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } - # Select first needed as outputPath is optional input. (bug in cromwell) + # Select first needed as outputPath is optional input (bug in cromwell). String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command { @@ -118,7 +123,7 @@ task Sort { -m ~{memoryPerThreadGb}G \ -o ~{outputPath} \ ~{inputBam} - # sambamba creates an index for us + # sambamba creates an index for us. mv ~{outputPath}.bai ~{bamIndexPath} } @@ -140,12 +145,14 @@ task Sort { outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + # outputs outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} } -} \ No newline at end of file +} From f81a99e864af4a567a33e0850dfd1f0672d60a96 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 12:16:45 +0100 Subject: [PATCH 0673/1208] Update layout samtools.wdl. --- samtools.wdl | 92 +++++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 44 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 9e415b0e..496cf233 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -55,8 +55,7 @@ task BgzipAndIndex { outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -104,11 +103,12 @@ task Fastq { String outputRead1 String? outputRead2 String? outputRead0 + Boolean appendReadNumber = false + Boolean outputQuality = false + Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter - Boolean appendReadNumber = false - Boolean outputQuality = false Int? compressionLevel Int threads = 1 @@ -151,16 +151,16 @@ task Fastq { outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} - includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"} - excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"} - excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"} - appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"} + appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`.", category: "advanced"} outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} + includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`.", category: "advanced"} + excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`.", category: "advanced"} + excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`.", category: "advanced"} + compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -168,6 +168,7 @@ task FilterShortReadsBam { input { File bamFile String outputPathBam + String memory = "1G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -196,6 +197,7 @@ task FilterShortReadsBam { } parameter_meta { + # inputs bamFile: {description: "The bam file to process.", category: "required"} outputPathBam: {description: "The filtered bam file.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} @@ -236,15 +238,16 @@ task Flagstat { outputPath: {description: "The location the ouput should be written to.", category: "required"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task Index { input { File bamFile + String? outputBamPath + String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" @@ -281,12 +284,10 @@ task Index { parameter_meta { # inputs bamFile: {description: "The BAM file for which an index should be made.", category: "required"} - outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", - category: "common"} + outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -319,8 +320,7 @@ task Markdup { inputBam: {description: "The BAM file to be processed.", category: "required"} outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -329,12 +329,13 @@ task Merge { Array[File]+ bamFiles String outputBamPath = "merged.bam" Boolean force = true - Int threads = 1 - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + Int threads = 1 String memory = "4G" + Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } + String indexPath = sub(outputBamPath, "\.bam$",".bai") # Samtools uses additional threads for merge. @@ -355,21 +356,20 @@ task Merge { runtime { cpu: threads - docker: dockerImage memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { # inputs bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} - threads: {description: "Number of threads to use.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -379,14 +379,15 @@ task Sort { String outputPath = basename(inputBam, "\.bam") + ".sorted.bam" Boolean sortByName = false Int compressionLevel = 1 - Int threads = 1 + Int memoryPerThreadGb = 4 + Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } - # Select first needed as outputPath is optional input. (bug in cromwell) + # Select first needed as outputPath is optional input (bug in cromwell). String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command { @@ -410,10 +411,10 @@ task Sort { } runtime { - cpu: 1 + cpu: threads memory: "~{memoryGb}G" - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -422,14 +423,15 @@ task Sort { outputPath: {description: "Output directory path + output file.", category: "required"} sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description "Sorted BAM file index."} } } @@ -438,10 +440,13 @@ task Tabix { File inputFile String outputFilePath = "indexed.vcf.gz" String type = "vcf" + Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } - # FIXME: It is better to do the indexing on VCF creation. Not in a separate task. With file localization this gets hairy fast. + + # FIXME: It is better to do the indexing on VCF creation. + # Not in a separate task. With file localization this gets hairy fast. command { set -e mkdir -p "$(dirname ~{outputFilePath})" @@ -459,27 +464,26 @@ task Tabix { runtime { time_minutes: timeMinutes - docker: dockerImage + docker: dockerImage } parameter_meta { # inputs inputFile: {description: "The file to be indexed.", category: "required"} - outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", - category: "common"} + outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } task View { input { File inFile - File? referenceFasta String outputFileName = "view.bam" Boolean uncompressedBamOutput = false + + File? referenceFasta Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter @@ -490,9 +494,10 @@ task View { Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" } + String outputIndexPath = basename(outputFileName) + ".bai" - # Always output to bam and output header + # Always output to bam and output header. command { set -e mkdir -p "$(dirname ~{outputFileName})" @@ -524,9 +529,9 @@ task View { parameter_meta { # inputs inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} - referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} @@ -534,7 +539,6 @@ task View { threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From d101e77cf3211079a7b7ca50c0203ffea811919b Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 15:38:52 +0100 Subject: [PATCH 0674/1208] Add last set of updates. --- .github/PULL_REQUEST_TEMPLATE.md | 3 +- CHANGELOG.md | 2 + pacbio.wdl | 89 +++++++++++++++++++++++++ samtools.wdl | 2 +- seqtk.wdl | 9 +-- smoove.wdl | 9 +-- somaticseq.wdl | 89 ++++++++++++------------- spades.wdl | 12 ++-- star.wdl | 20 +++--- strelka.wdl | 34 +++++----- stringtie.wdl | 18 ++--- survivor.wdl | 19 +++--- talon.wdl | 10 +-- transcriptclean.wdl | 21 +++--- umi-tools.wdl | 41 ++++++------ unicycler.wdl | 5 +- vardict.wdl | 41 ++++++------ vt.wdl | 20 +++--- whatshap.wdl | 111 ++++++++++++++++--------------- wisestork.wdl | 44 ++++++------ 20 files changed, 351 insertions(+), 248 deletions(-) create mode 100644 pacbio.wdl diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 199344f5..1d52f502 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,3 @@ - ### Checklist -- [ ] Pull request details were added to CHANGELOG.md +- [ ] Pull request details were added to CHANGELOG.md. - [ ] `parameter_meta` for each task is up to date. diff --git a/CHANGELOG.md b/CHANGELOG.md index f0dfaf1f..e7d7fed7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. + Centrifuge: Rename output in task `KReport` to `KrakenReport` to resolve @@ -20,6 +21,7 @@ version 5.0.0-dev + Bam2fastx: Add localisation of input files to Bam2Fasta task. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. ++ Add PacBio preprocessing specific tasks `mergePacBio` & `ccsChunks`. + CCS: Update CCS to version 5. + deepvariant: Add task for DeepVariant. + gatk: Make intervals optional for GenotypeGVCFs. diff --git a/pacbio.wdl b/pacbio.wdl new file mode 100644 index 00000000..01f6d4fd --- /dev/null +++ b/pacbio.wdl @@ -0,0 +1,89 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task mergePacBio { + input { + Array[File]+ reports + String mergedReport + + String memory = "4G" + String dockerImage = "lumc/pacbio-merge:0.2" + } + + command { + set -e + mkdir -p $(dirname ~{mergedReport}) + pacbio_merge \ + --reports ~{sep=" " reports} \ + --json-output ~{mergedReport} + } + + runtime { + memory: memory + docker: dockerImage + } + + output { + File MergedReport = mergedReport + } + + parameter_meta { + # inputs + reports: {description: "The PacBio report files to merge.", category: "required"} + mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + +task ccsChunks { + input { + Int chunkCount + + String memory = "4G" + String dockerImage = "python:3.7-slim" + } + + command { + set -e + python <' "modified_strelka.vcf" > ~{outputVCFName} } @@ -425,10 +424,10 @@ task ModifyStrelka { } parameter_meta { + # inputs strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/spades.wdl b/spades.wdl index 204dbfea..7cc16d21 100644 --- a/spades.wdl +++ b/spades.wdl @@ -22,10 +22,11 @@ version 1.0 task Spades { input { - String outputDir - String? preCommand File read1 File? read2 + String outputDir + + String? preCommand File? interlacedReads File? sangerReads File? pacbioReads @@ -44,12 +45,13 @@ task Spades { Boolean? disableGzipOutput Boolean? disableRepeatResolution File? dataset - Int threads = 1 - Int memoryGb = 16 File? tmpDir String? k Float? covCutoff Int? phredOffset + + Int threads = 1 + Int memoryGb = 16 } command { @@ -100,4 +102,4 @@ task Spades { cpu: threads memory: "~{memoryGb}G" } -} \ No newline at end of file +} diff --git a/star.wdl b/star.wdl index 3d0e2eb0..68193fcd 100644 --- a/star.wdl +++ b/star.wdl @@ -24,6 +24,7 @@ task GenomeGenerate { input { String genomeDir = "STAR_index" File referenceFasta + File? referenceGtf Int? sjdbOverhang @@ -61,8 +62,10 @@ task GenomeGenerate { File? sjdbListFromGtfOut = "~{genomeDir}/sjdbList.fromGTF.out.tab" File? sjdbListOut = "~{genomeDir}/sjdbList.out.tab" File? transcriptInfo = "~{genomeDir}/transcriptInfo.tab" - Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, chrStart, genome, genomeParameters, - sa, saIndex, exonGeTrInfo, exonInfo, geneInfo, sjdbInfo, sjdbListFromGtfOut, + Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, + chrStart, genome, genomeParameters, + sa, saIndex, exonGeTrInfo, exonInfo, + geneInfo, sjdbInfo, sjdbListFromGtfOut, sjdbListOut, transcriptInfo]) } @@ -74,16 +77,15 @@ task GenomeGenerate { } parameter_meta { + # inputs genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtf: {description: "The reference GTF file.", category: "common"} sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -95,6 +97,8 @@ task Star { String outFileNamePrefix String outSAMtype = "BAM SortedByCoordinate" String readFilesCommand = "zcat" + Int outBAMcompression = 1 + Int? outFilterScoreMin Float? outFilterScoreMinOverLread Int? outFilterMatchNmin @@ -103,7 +107,6 @@ task Star { String? twopassMode = "Basic" Array[String]? outSAMattrRGline String? outSAMunmapped = "Within KeepPairs" - Int outBAMcompression = 1 Int? limitBAMsortRAM Int runThreadN = 4 @@ -119,7 +122,7 @@ task Star { # So we solve it with an optional memory string and using select_first # in the runtime section. - #TODO Could be extended for all possible output extensions + #TODO: Could be extended for all possible output extensions. Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"} command { @@ -157,12 +160,14 @@ task Star { } parameter_meta { + # inputs inputR1: {description: "The first-/single-end FastQ files.", category: "required"} inputR2: {description: "The second-end FastQ files (in the same order as the first-end files).", category: "common"} indexFiles: {description: "The star index files.", category: "required"} outFileNamePrefix: {description: "The prefix for the output files. May include directories.", category: "required"} outSAMtype: {description: "The type of alignment file to be produced. Currently only `BAM SortedByCoordinate` is supported.", category: "advanced"} readFilesCommand: {description: "Equivalent to star's `--readFilesCommand` option.", category: "advanced"} + outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} outFilterScoreMin: {description: "Equivalent to star's `--outFilterScoreMin` option.", category: "advanced"} outFilterScoreMinOverLread: {description: "Equivalent to star's `--outFilterScoreMinOverLread` option.", category: "advanced"} outFilterMatchNmin: {description: "Equivalent to star's `--outFilterMatchNmin` option.", category: "advanced"} @@ -174,7 +179,6 @@ task Star { limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"} runThreadN: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/strelka.wdl b/strelka.wdl index 50c38b55..f4b9888b 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -29,11 +29,12 @@ task Germline { Array[File]+ indexes File referenceFasta File referenceFastaFai - File? callRegions - File? callRegionsIndex Boolean exome = false Boolean rna = false + File? callRegions + File? callRegionsIndex + Int cores = 1 Int memoryGb = 4 Int timeMinutes = 90 @@ -61,28 +62,27 @@ task Germline { } runtime { - docker: dockerImage cpu: cores - time_minutes: timeMinutes memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs runDir: {description: "The directory to use as run/output directory.", category: "common"} bams: {description: "The input BAM files.", category: "required"} indexes: {description: "The indexes for the input BAM files.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} - callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} rna: {description: "Whether or not the data is from RNA sequencing.", category: "common"} - + callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} + callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -95,11 +95,12 @@ task Somatic { File tumorBamIndex File referenceFasta File referenceFastaFai + Boolean exome = false + File? callRegions File? callRegionsIndex File? indelCandidatesVcf File? indelCandidatesVcfIndex - Boolean exome = false Int cores = 1 Int memoryGb = 4 @@ -133,13 +134,14 @@ task Somatic { } runtime { - docker: dockerImage cpu: cores - time_minutes: timeMinutes memory: "~{memoryGb}G" + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs runDir: {description: "The directory to use as run/output directory.", category: "common"} normalBam: {description: "The normal/control sample's BAM file.", category: "required"} normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"} @@ -147,17 +149,15 @@ task Somatic { tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"} callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"} indelCandidatesVcf: {description: "An indel candidates VCF file from manta.", category: "advanced"} indelCandidatesVcfIndex: {description: "The index for the indel candidates VCF file.", category: "advanced"} - exome: {description: "Whether or not the data is from exome sequencing.", category: "common"} - cores: {description: "The number of cores to use.", category: "advanced"} memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } meta { @@ -165,4 +165,4 @@ task Somatic { exclude: ["doNotDefineThis"] } } -} \ No newline at end of file +} diff --git a/stringtie.wdl b/stringtie.wdl index 5ed62dea..fff4140c 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -24,9 +24,10 @@ task Stringtie { input { File bam File bamIndex - File? referenceGtf Boolean skipNovelTranscripts = false String assembledTranscriptsFile + + File? referenceGtf Boolean? firstStranded Boolean? secondStranded String? geneAbundanceFile @@ -64,19 +65,19 @@ task Stringtie { } parameter_meta { + # inputs bam: {description: "The input BAM file.", category: "required"} bamIndex: {description: "The input BAM file's index.", category: "required"} - referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} skipNovelTranscripts: {description: "Whether new transcripts should be assembled or not.", category: "common"} assembledTranscriptsFile: {description: "Where the output of the assembly should be written.", category: "required"} + referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"} firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"} secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"} geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -84,13 +85,14 @@ task Merge { input { Array[File]+ gtfFiles String outputGtfPath + Boolean keepMergedTranscriptsWithRetainedIntrons = false + File? guideGtf Int? minimumLength Float? minimumCoverage Float? minimumFPKM Float? minimumTPM Float? minimumIsoformFraction - Boolean keepMergedTranscriptsWithRetainedIntrons = false String? label String memory = "10G" @@ -125,19 +127,19 @@ task Merge { } parameter_meta { + # inputs gtfFiles: {description: "The GTF files produced by stringtie.", category: "required"} outputGtfPath: {description: "Where the output should be written.", category: "required"} + keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} guideGtf: {description: "Equivalent to the -G option of 'stringtie --merge'.", category: "advanced"} minimumLength: {description: "Equivalent to the -m option of 'stringtie --merge'.", category: "advanced"} minimumCoverage: {description: "Equivalent to the -c option of 'stringtie --merge'.", category: "advanced"} minimumFPKM: {description: "Equivalent to the -F option of 'stringtie --merge'.", category: "advanced"} minimumTPM: {description: "Equivalent to the -T option of 'stringtie --merge'.", category: "advanced"} minimumIsoformFraction: {description: "Equivalent to the -f option of 'stringtie --merge'.", category: "advanced"} - keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"} label: {description: "Equivalent to the -l option of 'stringtie --merge'.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/survivor.wdl b/survivor.wdl index b9583009..c7b31058 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -32,6 +30,7 @@ task Merge { Boolean distanceBySvSize = false Int minSize = 30 String outputPath = "./survivor/merged.vcf" + String memory = "24G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" @@ -64,15 +63,15 @@ task Merge { parameter_meta { # inputs - filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "required"} - breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "advanced"} - suppVecs: {description: "The minimum number of SV callers to support the merging", category: "advanced"} - svType: {description: "A boolean to include the type SV to be merged", category: "advanced"} - strandType: {description: "A boolean to include strand type of an SV to be merged", category: "advanced"} - distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "advanced"} - minSize: {description: "The mimimum size of SV to be merged", category: "advanced"} + filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR.", category: "required"} + breakpointDistance: {description: "The distance between pairwise breakpoints between SVs.", category: "advanced"} + suppVecs: {description: "The minimum number of SV callers to support the merging.", category: "advanced"} + svType: {description: "A boolean to include the type SV to be merged.", category: "advanced"} + strandType: {description: "A boolean to include strand type of an SV to be merged.", category: "advanced"} + distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size.", category: "advanced"} + minSize: {description: "The mimimum size of SV to be merged.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The memory required to run the programs", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } diff --git a/talon.wdl b/talon.wdl index c11ab9e0..61f5eb4a 100644 --- a/talon.wdl +++ b/talon.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE diff --git a/transcriptclean.wdl b/transcriptclean.wdl index 79661307..efdd95f4 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2019 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -54,10 +54,10 @@ task GetSJsFromGtf { parameter_meta { # inputs - gtfFile: {description: "Input gtf file", category: "required"} - genomeFile: {description: "Reference genome", category: "required"} - minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} + gtfFile: {description: "Input gtf file.", category: "required"} + genomeFile: {description: "Reference genome.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -97,7 +97,7 @@ task GetTranscriptCleanStats { parameter_meta { # inputs - inputSam: {description: "Output sam file from transcriptclean", category: "required"} + inputSam: {description: "Output sam file from transcriptclean.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -189,8 +189,7 @@ task TranscriptClean { cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs fastaFile: {description: "Fasta file containing corrected reads."} diff --git a/umi-tools.wdl b/umi-tools.wdl index c5f3b145..7b0a3991 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2017 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -26,9 +26,10 @@ task Extract { File? read2 String bcPattern String? bcPattern2 - Boolean threePrime = false String read1Output = "umi_extracted_R1.fastq.gz" String? read2Output = "umi_extracted_R2.fastq.gz" + Boolean threePrime = false + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -50,21 +51,21 @@ task Extract { } runtime { - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs read1: {description: "The first/single-end fastq file.", category: "required"} read2: {description: "The second-end fastq file.", category: "common"} bcPattern: {description: "The pattern to be used for UMI extraction. See the umi_tools docs for more information.", category: "required"} bcPattern2: {description: "The pattern to be used for UMI extraction in the second-end reads. See the umi_tools docs for more information.", category: "advanced"} - threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} read1Output: {description: "The location to write the first/single-end output fastq file to.", category: "advanced"} read2Output: {description: "The location to write the second-end output fastq file to.", category: "advanced"} + threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } @@ -72,15 +73,15 @@ task Dedup { input { File inputBam File inputBamIndex - String? umiSeparator String outputBamPath - String? statsPrefix Boolean paired = true + String? umiSeparator + String? statsPrefix + String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - - # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9) + # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" } @@ -107,21 +108,21 @@ task Dedup { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} - statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} - umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} + umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} + statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} memory: {description: "The amount of memory required for the task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/unicycler.wdl b/unicycler.wdl index fc393603..938d0c7e 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -22,12 +22,13 @@ version 1.0 task Unicycler { input { + String out + String? preCommand File? short1 File? short2 File? unpaired File? long - String out Int? verbosity Int? minFastaLength Int? keep @@ -125,4 +126,4 @@ task Unicycler { cpu: threads memory: memory } -} \ No newline at end of file +} diff --git a/vardict.wdl b/vardict.wdl index 92beb32e..fc37c9ef 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -27,29 +27,28 @@ task VarDict { String tumorSampleName File tumorBam File tumorBamIndex - String? normalSampleName - File? normalBam - File? normalBamIndex File referenceFasta File referenceFastaFai File bedFile String outputVcf - - Int chromosomeColumn = 1 - Int startColumn = 2 - Int endColumn = 3 - Int geneColumn = 4 - Boolean outputCandidateSomaticOnly = true Boolean outputAllVariantsAtSamePosition = true Float mappingQuality = 20 Int minimumTotalDepth = 8 Int minimumVariantDepth = 4 Float minimumAlleleFrequency = 0.02 + Int chromosomeColumn = 1 + Int startColumn = 2 + Int endColumn = 3 + Int geneColumn = 4 + + String? normalSampleName + File? normalBam + File? normalBamIndex + String javaXmx = "16G" Int threads = 1 String memory = "18G" - String javaXmx = "16G" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } @@ -93,33 +92,31 @@ task VarDict { } parameter_meta { + # inputs tumorSampleName: {description: "The name of the tumor/case sample.", category: "required"} tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"} - normalSampleName: {description: "The name of the normal/control sample.", category: "common"} - normalBam: {description: "The normal/control sample's BAM file.", category: "common"} - normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} bedFile: {description: "A bed file describing the regions to operate on. These regions must be below 1e6 bases in size.", category: "required"} outputVcf: {description: "The location to write the output VCF file to.", category: "required"} - chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"} - startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"} - endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"} - geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"} outputCandidateSomaticOnly: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-M` flag.", category: "advanced"} outputAllVariantsAtSamePosition: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-A` flag.", category: "advanced"} mappingQuality: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-Q` option.", category: "advanced"} minimumTotalDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-d` option.", category: "advanced"} minimumVariantDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-v` option.", category: "advanced"} minimumAlleleFrequency: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-f` option.", category: "advanced"} - + chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"} + startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"} + endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"} + geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"} + normalSampleName: {description: "The name of the normal/control sample.", category: "common"} + normalBam: {description: "The normal/control sample's BAM file.", category: "common"} + normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/vt.wdl b/vt.wdl index 99cc1318..95585ff2 100644 --- a/vt.wdl +++ b/vt.wdl @@ -1,6 +1,6 @@ version 1.0 -# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center +# Copyright (c) 2020 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -8,10 +8,10 @@ version 1.0 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -28,9 +28,10 @@ task Normalize { File referenceFastaFai Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" - String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + String memory = "4G" Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" } command { @@ -56,13 +57,12 @@ task Normalize { # inputs inputVCF: {description: "The VCF file to process.", category: "required"} inputVCFIndex: {description: "The index of the VCF file to be processed.", category: "required"} - outputPath: {description: "The location the output VCF file should be written.", category: "common"} referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs", category: "advanced"} - memory: {description: "The memory required to run the programs", category: "advanced"} + ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } - diff --git a/whatshap.wdl b/whatshap.wdl index 93624590..5c69400a 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -20,10 +20,14 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. - task Phase { input { String outputVCF + File vcf + File vcfIndex + File phaseInput + File phaseInputIndex + File? reference File? referenceIndex String? tag @@ -33,20 +37,15 @@ task Phase { String? chromosome String? threshold String? ped - File vcf - File vcfIndex - File phaseInput - File phaseInputIndex String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e - whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -69,24 +68,27 @@ task Phase { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"} - reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created", category: "common"} - tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS)", category: "common"} - algorithm: {description: "Phasing algorithm to use (default: {description: whatshap)", category: "advanced"} - indels: {description: "Also phase indels (default: {description: do not phase indels)", category: "common"} + vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed).", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"} + phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF).", category: "required"} + phaseInputIndex: {description: "Index of BAM, CRAM, VCF or BCF file(s) with phase information.", category: "required"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"} + referenceIndex: {description: "Index of reference file.", category: "common"} + tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS).", category: "common"} + algorithm: {description: "Phasing algorithm to use (default: {description: whatshap).", category: "advanced"} + indels: {description: "Also phase indels (default: {description: do not phase indels).", category: "common"} sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"} threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"} ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"} - vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed)", category: "required"} - vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased", category: "required"} - phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF)", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -95,16 +97,17 @@ task Phase { task Stats { input { + File vcf + String? gtf String? sample String? tsv String? blockList String? chromosome - File vcf String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } @@ -125,18 +128,19 @@ task Stats { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - gtf: "Write phased blocks to GTF file." - sample: "Name of the sample to process. If not given, use first sample found in VCF." - tsv: "Filename to write statistics to (tab-separated)." - blockList: "Filename to write list of all blocks to (one block per line)." - chromosome: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered." - vcf: "Phased VCF file" + # inputs + vcf: {description: "Phased VCF file.", category: "required"} + gtf: {description: "Write phased blocks to GTF file.", category: "common"} + sample: {description: "Name of the sample to process. If not given, use first sample found in VCF.", category: "common"} + tsv: {description: "Filename to write statistics to (tab-separated).", category: "common"} + blockList: {description: "Filename to write list of all blocks to (one block per line).", category: "advanced"} + chromosome: {description: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -145,57 +149,58 @@ task Stats { task Haplotag { input { + File vcf + File vcfIndex + File alignments + File alignmentsIndex String outputFile + File? reference File? referenceFastaIndex String? regions String? sample - File vcf - File vcfIndex - File alignments - File alignmentsIndex String memory = "4G" Int timeMinutes = 120 - # Whatshap 1.0, tabix 0.2.5 + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e - whatshap haplotag \ - ~{vcf} \ - ~{alignments} \ - ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ - ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ - ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ - ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} - - python3 -c "import pysam; pysam.index('~{outputFile}')" + ~{vcf} \ + ~{alignments} \ + ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \ + ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \ + ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \ + ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} + + python3 -c "import pysam; pysam.index('~{outputFile}')" } output { - File bam = outputFile - File bamIndex = outputFile + ".bai" + File bam = outputFile + File bamIndex = outputFile + ".bai" } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { - outputFile: "Output file. If omitted, use standard output." - reference: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created." - referenceFastaIndex: "Index for the reference file." - regions: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome)." - sample: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times." - vcf: "VCF file with phased variants (must be gzip-compressed and indexed)." - vcfIndex: "Index for the VCF or BCF file with variants to be phased." - alignments: "File (BAM/CRAM) with read alignments to be tagged by haplotype." - alignmentsIndex: "Index for the alignment file." + # inputs + vcf: {description: "VCF file with phased variants (must be gzip-compressed and indexed).", category: "required"} + vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"} + alignments: {description: "File (BAM/CRAM) with read alignments to be tagged by haplotype.", category: "required"} + alignmentsIndex: {description: "Index for the alignment file.", category: "required"} + outputFile: {description: "Output file. If omitted, use standard output.", category: "required"} + reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"} + referenceFastaIndex: {description: "Index for the reference file.", category: "common"} + regions: {description: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome).", category: "advanced"} + sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/wisestork.wdl b/wisestork.wdl index 0fd812b1..6be32168 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -22,13 +22,15 @@ version 1.0 task Count { input { - Int? binSize - File reference - File referenceIndex - File? binFile File inputBam File inputBamIndex + File reference + File referenceIndex String outputBed = "output.bed" + + Int? binSize + File? binFile + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -54,15 +56,17 @@ task Count { task GcCorrect { input { - Int? binSize File reference File referenceIndex - File? binFile File inputBed String outputBed = "output.bed" + + Int? binSize + File? binFile Float? fracN Int? iter Float? fracLowess + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -91,13 +95,16 @@ task GcCorrect { task Newref { input { - Int? binSize File reference File referenceIndex - File? binFile Array[File]+ inputBeds String outputBed = "output.bed" + + Int? binSize + File? binFile Int? nBins + + Int memory = 2 + ceil(length(inputBeds) * 0.15) String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -106,36 +113,36 @@ task Newref { mkdir -p $(dirname ~{outputBed}) wisestork newref \ ~{"--binsize " + binSize} \ - --reference ~{reference} \ - ~{"--bin-file " + binFile} \ - --output ~{outputBed} \ - -I ~{sep=" -I " inputBeds} \ - ~{"--n-bins " + nBins} + --reference ~{reference} \ + ~{"--bin-file " + binFile} \ + --output ~{outputBed} \ + -I ~{sep=" -I " inputBeds} \ + ~{"--n-bins " + nBins} } output { File bedFile = outputBed } - Int memory = 2 + ceil(length(inputBeds) * 0.15) - runtime { - docker: dockerImage memory: "~{memory}G" + docker: dockerImage } } task Zscore { input { - Int? binSize File reference File referenceIndex - File? binFile File inputBed File inputBedIndex File dictionaryFile File dictionaryFileIndex String outputBed = "output.bed" + + Int? binSize + File? binFile + String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -159,4 +166,3 @@ task Zscore { docker: dockerImage } } - From f34613058333fbc3a523ef513fdc6026cdd87378 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 2 Nov 2020 17:21:20 +0100 Subject: [PATCH 0675/1208] Update lima to match isoseq3 and ccs changes. --- CHANGELOG.md | 1 + lima.wdl | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7d7fed7..9bff5f3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ version 5.0.0-dev + Bwa & bwa-mem2: Add parameter_meta for `outputHla`. + Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. + Bam2fastx: Add localisation of input files to Bam2Fasta task. ++ Lima: `cores` input has been renamed to `threads` to match tool naming. + isoseq3: `cores` input has been renamed to `threads` to match tool naming. + CCS: `cores` input has been renamed to `threads` to match tool naming. + Add PacBio preprocessing specific tasks `mergePacBio` & `ccsChunks`. diff --git a/lima.wdl b/lima.wdl index 1da4ef5e..33b2328b 100644 --- a/lima.wdl +++ b/lima.wdl @@ -48,7 +48,7 @@ task Lima { File barcodeFile String outputPrefix - Int cores = 2 + Int threads = 2 String memory = "2G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" @@ -82,7 +82,7 @@ task Lima { --guess-min-count ~{guessMinCount} \ ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ - --num-threads ~{cores} \ + --num-threads ~{threads} \ ~{"--log-file " + outputPrefix + ".stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ @@ -110,7 +110,7 @@ task Lima { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -143,7 +143,7 @@ task Lima { inputBamFile: {description: "Bam input file.", category: "required"} barcodeFile: {description: "Barcode/primer fasta file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} + threads: {description: "The number of threads to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From b96ec320ded2fec077f358460376bba1582337ac Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:21:20 +0100 Subject: [PATCH 0676/1208] Update bwa.wdl. Co-authored-by: Davy Cats --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index 0f09f7a9..ee01957e 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -33,8 +33,8 @@ task Mem { String? readgroup Int? sortThreads + Int? memoryGb - Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 From adf58a85569ca3335874b5cf55bf86933aacbb8e Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:22:19 +0100 Subject: [PATCH 0677/1208] Update bwa-mem2.wdl. Co-authored-by: Davy Cats --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 34cd38a6..89a48fbd 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -33,8 +33,8 @@ task Mem { String? readgroup Int? sortThreads + Int? memoryGb - Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 From a5aa0fef74bbadb4ea1562ebf65e860975dc3fbe Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:22:56 +0100 Subject: [PATCH 0678/1208] Update bwa-mem2.wdl --- bwa-mem2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 89a48fbd..b4ca877a 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10 + # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } From d35543c91eba6179b6738f9ac3eb412ded0f60a6 Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:23:12 +0100 Subject: [PATCH 0679/1208] Update bwa.wdl --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index ee01957e..f4061729 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int? memoryGb Int threads = 4 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) - # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 + # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } From 38514b1db4049d1127f03e4c888027c99c2b9bcf Mon Sep 17 00:00:00 2001 From: Jasper Date: Tue, 3 Nov 2020 17:25:29 +0100 Subject: [PATCH 0680/1208] Update bam2fastx.wdl --- bam2fastx.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 2ad08581..2ae22a57 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -58,7 +58,7 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " bam} + $bamFiles } output { From 8a02fc35c76674d7b2b7e1d4b9addaaaea58e9ff Mon Sep 17 00:00:00 2001 From: Jasper Date: Wed, 4 Nov 2020 08:49:03 +0100 Subject: [PATCH 0681/1208] Update bwa-mem2.wdl --- bwa-mem2.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index b4ca877a..4566e68c 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -34,8 +34,8 @@ task Mem { String? readgroup Int? sortThreads - Int? memoryGb Int threads = 4 + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" @@ -101,8 +101,8 @@ task Mem { compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From c8e043006f744f23155d0fba00ebec962bf5c910 Mon Sep 17 00:00:00 2001 From: Jasper Date: Wed, 4 Nov 2020 08:49:50 +0100 Subject: [PATCH 0682/1208] Update bwa.wdl --- bwa.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index f4061729..e87fd82a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -33,9 +33,9 @@ task Mem { String? readgroup Int? sortThreads - - Int? memoryGb + Int threads = 4 + Int? memoryGb Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" @@ -96,8 +96,8 @@ task Mem { compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"} - memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} threads: {description: "The number of threads to use for alignment.", category: "advanced"} + memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4a2f3366cb5f0cd57bfab8da01369c29c6a35063 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 5 Nov 2020 16:19:31 +0100 Subject: [PATCH 0683/1208] add tasks for amber and cobalt, group tasks from hmftools in one file --- gripss.wdl | 122 ----------------------------------------------------- sage.wdl | 100 ------------------------------------------- 2 files changed, 222 deletions(-) delete mode 100644 gripss.wdl delete mode 100644 sage.wdl diff --git a/gripss.wdl b/gripss.wdl deleted file mode 100644 index c9a8f27d..00000000 --- a/gripss.wdl +++ /dev/null @@ -1,122 +0,0 @@ -version 1.0 - -# Copyright (c) 2020 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -task ApplicationKt { - input { - File inputVcf - String outputPath = "gripss.vcf.gz" - File referenceFasta - File referenceFastaFai - File referenceFastaDict - File breakpointHotspot - File breakendPon - File breakpointPon - - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssApplicationKt \ - -ref_genome ~{referenceFasta} \ - -breakpoint_hotspot ~{breakpointHotspot} \ - -breakend_pon ~{breakendPon} \ - -breakpoint_pon ~{breakpointPon} \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} - breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} - -task HardFilterApplicationKt { - input { - File inputVcf - String outputPath = "gripss_hard_filter.vcf.gz" - - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ - com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ - -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - } - - runtime { - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF.", category: "required"} - outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} \ No newline at end of file diff --git a/sage.wdl b/sage.wdl deleted file mode 100644 index ab42bee8..00000000 --- a/sage.wdl +++ /dev/null @@ -1,100 +0,0 @@ -version 1.0 - -# Copyright (c) 2020 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -task Sage { - input { - String tumorName - File tumorBam - File tumorBamIndex - String? normalName - File? normalBam - File? normalBamIndex - File referenceFasta - File referenceFastaDict - File referenceFastaFai - File hotspots - File panelBed - File highConfidenceBed - Boolean hg38 = false - String outputPath = "./sage.vcf.gz" - - Int threads = 2 - String javaXmx = "32G" - String memory = "33G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" - } - - command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ - -tumor ~{tumorName} \ - -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ - -ref_genome ~{referenceFasta} \ - -hotspots ~{hotspots} \ - -panel_bed ~{panelBed} \ - -high_confidence_bed ~{highConfidenceBed} \ - -assembly ~{true="hg38" false="hg19" hg38} \ - -threads ~{threads} \ - -out ~{outputPath} - } - - output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" - # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. - # This seems to be a systemic issue with R generated plots in biocontainers... - } - - runtime { - time_minutes: timeMinutes # !UnknownRuntimeKey - cpu: threads - docker: dockerImage - memory: memory - } - - parameter_meta { - tumorName: {description: "The name of the tumor sample.", category: "required"} - tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} - tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} - panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} - highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} - - memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} - } -} From 333f052f344b331591797bccbe45028c6882b770 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 11:44:09 +0100 Subject: [PATCH 0684/1208] Update first set of parameter_meta. --- CHANGELOG.md | 1 + TO-DO.md | 13 +++++++ bcftools.wdl | 15 ++++++++ bedtools.wdl | 32 ++++++++++++++++ biopet/bamstats.wdl | 11 +++--- biopet/biopet.wdl | 85 ++++++++++++++++++++++------------------- biopet/sampleconfig.wdl | 21 +++++----- biopet/seqstat.wdl | 9 +++-- biowdl.wdl | 3 ++ bowtie.wdl | 4 ++ chunked-scatter.wdl | 6 +++ 11 files changed, 143 insertions(+), 57 deletions(-) create mode 100644 TO-DO.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bff5f3c..b7a8741f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Complete `parameter_meta` for tasks missing the outputs. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. diff --git a/TO-DO.md b/TO-DO.md new file mode 100644 index 00000000..cc76a5d6 --- /dev/null +++ b/TO-DO.md @@ -0,0 +1,13 @@ +#TO DO +## Requires parameter_meta: +* biopet.wdl: `ExtractAdaptersFastqc`. + +## Duplicate tasks: +* + +## Out of date with new cluster & parameter_meta: +* bamstats.wdl: `Generate`. +* biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, + `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats`. +* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl`. +* seqstat.wdl: `Generate`. diff --git a/bcftools.wdl b/bcftools.wdl index 41825747..28380dea 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -118,6 +118,10 @@ task Annotate { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Annotated VCF file."} + outputVcfIndex: {description: "Index of the annotated VCF file."} } } @@ -165,6 +169,10 @@ task Sort { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file."} + outputVcfIndex: {description: "Index of sorted VCF file."} } } @@ -272,6 +280,9 @@ task Stats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + stats: {description: "Text file stats which is suitable for machine processing and can be plotted using plot-vcfstats."} } } @@ -316,5 +327,9 @@ task View { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "VCF file."} + outputVcfIndex: {description: "Index of VCF file."} } } diff --git a/bedtools.wdl b/bedtools.wdl index b7a03c17..3dbf93cb 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -60,6 +60,9 @@ task Complement { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + complementBed: {description: "All intervals in a genome that are not covered by at least one interval in the input file."} } } @@ -95,6 +98,9 @@ task Merge { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedBed: {description: "Merged bed file."} } } @@ -132,6 +138,9 @@ task MergeBedFiles { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedBed: {description: "Merged bed file."} } } @@ -179,6 +188,26 @@ task Sort { time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + inputBed: {description: "The bed to sort.", category: "required"} + sizeA: {description: "Sort by feature size in ascending order.", category: "common"} + sizeD: {description: "Sort by feature size in descending order.", category: "common"} + chrThenSizeA: {description: "Sort by chromosome (asc), then by feature size (asc).", category: "common"} + chrThenSizeD: {description: "Sort by chromosome (asc), then by feature size (desc).", category: "common"} + chrThenScoreA: {description: "Sort by chromosome (asc), then by score (asc).", category: "common"} + chrThenScoreD: {description: "Sort by chromosome (asc), then by score (desc).", category: "common"} + outputBed: {description: "The path to write the output to.", category: "advanced"} + genome: {description: "Define sort order by order of tab-delimited file with chromosome names in the first column.", category: "advanced"} + faidx: {description: "Define sort order by order of tab-delimited file with chromosome names in the first column. Sort by specified chromosome order.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + sortedBed: {description: "The sorted bed file."} + } } task Intersect { @@ -226,5 +255,8 @@ task Intersect { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + intersectedBed: {description: "The intersected bed file."} } } diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl index d71355d3..d01bc10c 100644 --- a/biopet/bamstats.wdl +++ b/biopet/bamstats.wdl @@ -24,18 +24,19 @@ import "../common.wdl" as common task Generate { input { - String? preCommand - File? toolJar IndexedBamFile bam - File? bedFile Boolean scatterMode = false Boolean onlyUnmapped = false Boolean tsvOutputs = false String outputDir + + String? preCommand + File? toolJar + File? bedFile Reference? reference - String memory = "9G" String javaXmx = "8G" + String memory = "9G" } File referenceFasta = if defined(reference) then select_first([reference]).fasta else "" @@ -66,4 +67,4 @@ task Generate { runtime { memory: memory } -} \ No newline at end of file +} diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 89319409..07f51e67 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -24,15 +24,16 @@ import "../common.wdl" task BaseCounter { input { - String? preCommand - File? toolJar IndexedBamFile bam File refFlat String outputDir String prefix - String memory = "5G" + String? preCommand + File? toolJar + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -98,16 +99,17 @@ task ExtractAdaptersFastqc { String outputDir String adapterOutputFilePath = outputDir + "/adapter.list" String contamsOutputFilePath = outputDir + "/contaminations.list" + Boolean? skipContams File? knownContamFile File? knownAdapterFile Float? adapterCutoff Boolean? outputAsFasta - String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" + String memory = "9G" Int timeMinutes = 5 + String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" } command { @@ -133,20 +135,21 @@ task ExtractAdaptersFastqc { runtime { memory: memory - docker: dockerImage time_minutes: timeMinutes + docker: dockerImage } } task FastqSplitter { input { - String? preCommand File inputFastq Array[String]+ outputPaths + + String? preCommand File? toolJar - String memory = "5G" String javaXmx = "4G" + String memory = "5G" String dockerImage = "quay.io/biocontainers/biopet-fastqsplitter:0.1--2" } @@ -170,15 +173,16 @@ task FastqSplitter { task FastqSync { input { - String? preCommand FastqPair refFastq FastqPair inputFastq String out1path String out2path + + String? preCommand File? toolJar - String memory = "5G" String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -200,8 +204,8 @@ task FastqSync { output { FastqPair out1 = object { - R1: out1path, - R2: out2path + R1: out1path, + R2: out2path } } @@ -215,14 +219,15 @@ task ScatterRegions { File referenceFasta File referenceFastaDict Int scatterSizeMillions = 1000 + Boolean notSplitContigs = false + Int? scatterSize File? regions - Boolean notSplitContigs = false File? bamFile File? bamIndex - String memory = "1G" String javaXmx = "500M" + String memory = "1G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" } @@ -264,41 +269,40 @@ task ScatterRegions { } runtime { - docker: dockerImage - time_minutes: timeMinutes memory: memory + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", - category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} + notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", category: "advanced"} scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} regions: {description: "The regions to be scattered.", category: "advanced"} - notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", - category: "advanced"} - bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", - category: "advanced"} + bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", category: "advanced"} bamIndex: {description: "The index for the bamfile given through bamFile.", category: "advanced"} - + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + scatters: {description: "Smaller scatter regions of equal size."} } } task ValidateAnnotation { input { + Reference reference + File? refRefflat File? gtfFile - Reference reference - String memory = "4G" String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/biopet-validateannotation:0.1--0" } @@ -323,8 +327,9 @@ task ValidateFastq { input { File read1 File? read2 - String memory = "4G" + String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--1" } @@ -348,8 +353,9 @@ task ValidateVcf { input { IndexedVcfFile vcf Reference reference - String memory = "4G" + String javaXmx = "3G" + String memory = "4G" String dockerImage = "quay.io/biocontainers/biopet-validatevcf:0.1--0" } @@ -374,12 +380,6 @@ task VcfStats { IndexedVcfFile vcf Reference reference String outputDir - File? intervals - Array[String]+? infoTags - Array[String]+? genotypeTags - Int? sampleToSampleMinDepth - Int? binSize - Int? maxContigsInSingleJob Boolean writeBinStats = false Int localThreads = 1 Boolean notWriteContigStats = false @@ -387,13 +387,20 @@ task VcfStats { Boolean skipGenotype = false Boolean skipSampleDistributions = false Boolean skipSampleCompare = false + + File? intervals + Array[String]+? infoTags + Array[String]+? genotypeTags + Int? sampleToSampleMinDepth + Int? binSize + Int? maxContigsInSingleJob String? sparkMaster Int? sparkExecutorMemory Array[String]+? sparkConfigValues - String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" - String memory = "5G" String javaXmx = "4G" + String memory = "5G" + String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" } command { diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl index 2b36952b..f3955658 100644 --- a/biopet/sampleconfig.wdl +++ b/biopet/sampleconfig.wdl @@ -24,18 +24,19 @@ import "../common.wdl" as common task SampleConfig { input { - File? toolJar - String? preCommand Array[File]+ inputFiles String keyFilePath + + File? toolJar + String? preCommand String? sample String? library String? readgroup String? jsonOutputPath String? tsvOutputPath - String memory = "17G" String javaXmx = "16G" + String memory = "17G" } String toolCommand = if defined(toolJar) @@ -69,13 +70,14 @@ task SampleConfig { task SampleConfigCromwellArrays { input { - File? toolJar - String? preCommand Array[File]+ inputFiles String outputPath - String memory = "5G" + File? toolJar + String? preCommand + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -102,16 +104,17 @@ task SampleConfigCromwellArrays { task CaseControl { input { - File? toolJar - String? preCommand Array[File]+ inputFiles Array[File]+ inputIndexFiles Array[File]+ sampleConfigs String outputPath String controlTag = "control" - String memory = "5G" + File? toolJar + String? preCommand + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl index e3a55ec3..c2eb5866 100644 --- a/biopet/seqstat.wdl +++ b/biopet/seqstat.wdl @@ -24,16 +24,17 @@ import "../common.wdl" as common task Generate { input { - String? preCommand - File? toolJar FastqPair fastq String outputFile String sample String library String readgroup - String memory = "5G" + String? preCommand + File? toolJar + String javaXmx = "4G" + String memory = "5G" } String toolCommand = if defined(toolJar) @@ -60,4 +61,4 @@ task Generate { runtime { memory: memory } -} \ No newline at end of file +} diff --git a/biowdl.wdl b/biowdl.wdl index 8a1f9dfd..06b1d756 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -68,5 +68,8 @@ task InputConverter { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + json: {description: "JSON file version of the input sample sheet."} } } diff --git a/bowtie.wdl b/bowtie.wdl index 7fb1b614..87210dcd 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -100,5 +100,9 @@ task Bowtie { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Output alignment file."} + outputBamIndex: {description: "Index of output alignment file."} } } diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 844d6990..fba1af5a 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -68,6 +68,9 @@ task ChunkedScatter { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + scatters: {description: "Overlapping chunks of a given size in new bed files."} } } @@ -118,5 +121,8 @@ task ScatterRegions { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + scatters: {description: "Bed file where the contigs add up approximately to the given scatter size."} } } From 9d68eb40b045b859cb2619b0f1cec1d2f7437f2e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 12:08:06 +0100 Subject: [PATCH 0685/1208] Update more tasks. --- .github/PULL_REQUEST_TEMPLATE.md | 3 ++- TO-DO.md | 19 ++++++++++++++----- clever.wdl | 5 ++++- collect-columns.wdl | 3 +++ common.wdl | 6 ++++++ 5 files changed, 29 insertions(+), 7 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 1d52f502..3b4ec9ac 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,4 @@ ### Checklist - [ ] Pull request details were added to CHANGELOG.md. -- [ ] `parameter_meta` for each task is up to date. +- [ ] Documentation was updated (if required). +- [ ] `parameter_meta` was added/updated (if required). diff --git a/TO-DO.md b/TO-DO.md index cc76a5d6..7a18bb33 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,13 +1,22 @@ #TO DO ## Requires parameter_meta: -* biopet.wdl: `ExtractAdaptersFastqc`. +* biopet.wdl: `ExtractAdaptersFastqc` ## Duplicate tasks: * ## Out of date with new cluster & parameter_meta: -* bamstats.wdl: `Generate`. +* bamstats.wdl: `Generate` * biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, - `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats`. -* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl`. -* seqstat.wdl: `Generate`. + `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats` +* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl` +* seqstat.wdl: `Generate` +* common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, + `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` + +## Imports other tasks: +* bamstats.wdl +* biopet.wdl +* sampleconfig.wdl +* seqstat.wdl +* clever.wdl diff --git a/clever.wdl b/clever.wdl index 75e889b3..186be514 100644 --- a/clever.wdl +++ b/clever.wdl @@ -80,6 +80,9 @@ task Mateclever { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + matecleverVcf: {description: "VCF with additional mateclever results."} } } @@ -132,6 +135,6 @@ task Prediction { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - predictions: {description: "The predicted deletions (VCF) from clever.", category: "advanced"} + predictions: {description: "The predicted deletions (VCF) from clever."} } } diff --git a/collect-columns.wdl b/collect-columns.wdl index 67db6179..3d65c7e7 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -83,5 +83,8 @@ task CollectColumns { memoryGb: {description: "The maximum amount of memory the job will need in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputTable: {description: "All input columns combined into one table."} } } diff --git a/common.wdl b/common.wdl index b3878bb6..66bdb99c 100644 --- a/common.wdl +++ b/common.wdl @@ -207,6 +207,9 @@ task TextToFile { outputFile: {description: "The name of the output file.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + out: {description: "File containing input text."} } } @@ -251,6 +254,9 @@ task YamlToJson { memory: {description: "The maximum amount of memory the job will need.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + json: {description: "JSON file version of input YAML."} } } From 3123947972f8a4cb288f96e539e143b40e3e136a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 13:04:35 +0100 Subject: [PATCH 0686/1208] Update even more tasks. --- CPAT.wdl | 3 +++ TO-DO.md | 12 ++++++------ biopet/biopet.wdl | 23 +++++++++++++++++++++++ cutadapt.wdl | 14 ++++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index d97031dc..afb67853 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -76,6 +76,9 @@ task CPAT { stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outFile: {description: "CPAT logistic regression model."} } } diff --git a/TO-DO.md b/TO-DO.md index 7a18bb33..be118b70 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,10 +1,4 @@ #TO DO -## Requires parameter_meta: -* biopet.wdl: `ExtractAdaptersFastqc` - -## Duplicate tasks: -* - ## Out of date with new cluster & parameter_meta: * bamstats.wdl: `Generate` * biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, @@ -20,3 +14,9 @@ * sampleconfig.wdl * seqstat.wdl * clever.wdl + +## Requires parameter_meta: +* + +## Duplicate tasks: +* diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl index 07f51e67..e6619e09 100644 --- a/biopet/biopet.wdl +++ b/biopet/biopet.wdl @@ -138,6 +138,29 @@ task ExtractAdaptersFastqc { time_minutes: timeMinutes docker: dockerImage } + + parameter_meta { + # inputs + inputFile: {description: "Input fastq file.", category: "required"} + outputDir: {description: "The path to which the output should be written.", category: "required"} + adapterOutputFilePath: {description: "Output file for adapters, if not supplied output will go to stdout.", category: "common"} + contamsOutputFilePath: {description: "Output file for contaminations, if not supplied output will go to stdout.", category: "common"} + skipContams: {description: "If this is set only the adapters block is used, other wise contaminations is also used.", category: "advanced"} + knownContamFile: {description: "This file should contain the known contaminations from fastqc.", category: ""advanced} + knownAdapterFile: {description: "This file should contain the known adapters from fastqc.", category: "advanced"} + adapterCutoff: {description: "The fraction of the adapters in a read should be above this fraction, default is 0.001.", category: "advanced"} + outputAsFasta: {description: "Output in fasta format, default only sequences.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + adapterOutputFile: {description: "Output file with adapters."} + contamsOutputFile: {description: "Output file with contaminations."} + adapterList: {description: "List of adapters."} + contamsList: {description: "List of contaminations."} + } } task FastqSplitter { diff --git a/cutadapt.wdl b/cutadapt.wdl index 74f57912..8d409c3d 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -235,5 +235,19 @@ task Cutadapt { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + cutRead1: {description: ""} + report: {description: ""} + cutRead2: {description: ""} + tooLongOutput: {description: ""} + tooShortOutput: {description: ""} + untrimmedOutput: {description: ""} + tooLongPairedOutput: {description: ""} + tooShortPairedOutput: {description: ""} + untrimmedPairedOutput: {description: ""} + infoFile: {description: ""} + restFile: {description: ""} + wildcardFile: {description: ""} } } From e76c0c02f410eb8b8f9b09b9cbccb125930d4c35 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 13:50:40 +0100 Subject: [PATCH 0687/1208] Upload more tasks. --- cutadapt.wdl | 24 ++++++++++++------------ deepvariant.wdl | 7 +++++++ delly.wdl | 3 +++ fastqc.wdl | 12 ++++++++++++ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index 8d409c3d..b2dbdec0 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -237,17 +237,17 @@ task Cutadapt { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - cutRead1: {description: ""} - report: {description: ""} - cutRead2: {description: ""} - tooLongOutput: {description: ""} - tooShortOutput: {description: ""} - untrimmedOutput: {description: ""} - tooLongPairedOutput: {description: ""} - tooShortPairedOutput: {description: ""} - untrimmedPairedOutput: {description: ""} - infoFile: {description: ""} - restFile: {description: ""} - wildcardFile: {description: ""} + cutRead1: {description: "Trimmed read one."} + report: {description: "Per-adapter statistics file."} + cutRead2: {description: "Trimmed read two in pair."} + tooLongOutput: {description: "Reads that are too long according to -M."} + tooShortOutput: {description: "Reads that are too short according to -m."} + untrimmedOutput: {description: "All reads without adapters (instead of the regular output file)."} + tooLongPairedOutput: {description: "Second reads in a pair."} + tooShortPairedOutput: {description: "Second reads in a pair."} + untrimmedPairedOutput: {description: "The second reads in a pair that were not trimmed."} + infoFile: {description: "Detailed information about where adapters were found in each read."} + restFile: {description: "The rest file."} + wildcardFile: {description: "The wildcard file."} } } diff --git a/deepvariant.wdl b/deepvariant.wdl index 8b08e111..f71a1c88 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -90,5 +90,12 @@ task RunDeepVariant { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVCF: {description: "Output VCF file."} + outputVCFIndex: {description: "Index of output VCF file."} + outputVCFStatsReport: {description: "Statistics file."} + outputGVCF: {description: "GVCF version of VCF file(s)."} + outputGVCFIndex: {description: "Index of GVCF file(s)."} } } diff --git a/delly.wdl b/delly.wdl index ffe9023a..bf00ed36 100644 --- a/delly.wdl +++ b/delly.wdl @@ -62,5 +62,8 @@ task CallSV { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dellyBcf: {description: "File containing structural variants."} } } diff --git a/fastqc.wdl b/fastqc.wdl index feeeaae5..7ca0baa8 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -125,6 +125,13 @@ task Fastqc { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + htmlReport: {description: ""} + reportZip: {description: ""} + summary: {description: ""} + rawReport: {description: ""} + images: {description: ""} } meta { @@ -167,5 +174,10 @@ task GetConfiguration { # inputs timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + adapterList: {description: ""} + contaminantList: {description: ""} + limits: {description: ""} } } From 091c3e313077aff989a43e13052625330646ad47 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 6 Nov 2020 17:12:40 +0100 Subject: [PATCH 0688/1208] Upload another batch. --- TO-DO.md | 2 ++ fastqc.wdl | 16 ++++++++-------- gatk.wdl | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 55 insertions(+), 8 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index be118b70..8c1723b6 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -7,6 +7,8 @@ * seqstat.wdl: `Generate` * common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` +* fastqsplitter.wdl: `Fastqsplitter` +* flash.wdl: `Flash` ## Imports other tasks: * bamstats.wdl diff --git a/fastqc.wdl b/fastqc.wdl index 7ca0baa8..973eeed9 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -127,11 +127,11 @@ task Fastqc { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - htmlReport: {description: ""} - reportZip: {description: ""} - summary: {description: ""} - rawReport: {description: ""} - images: {description: ""} + htmlReport: {description: "HTML report file."} + reportZip: {description: "Source data file."} + summary: {description: "Summary file."} + rawReport: {description: "Raw report file."} + images: {description: "Images in report file."} } meta { @@ -176,8 +176,8 @@ task GetConfiguration { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - adapterList: {description: ""} - contaminantList: {description: ""} - limits: {description: ""} + adapterList: {description: "List of adapters found."} + contaminantList: {description: "List of contaminants found."} + limits: {description: "Limits file."} } } diff --git a/gatk.wdl b/gatk.wdl index 7aa2915c..d13c1175 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -78,6 +78,9 @@ task AnnotateIntervals { javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + annotatedIntervals: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a sequence dictionary, a row specifying the column headers for the contained annotations, and the corresponding entry rows."} } } @@ -145,6 +148,11 @@ task ApplyBQSR { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + recalibratedBam: {description: "A BAM file containing the recalibrated read data."} + recalibratedBamIndex: {description: "Index of recalibrated BAM file."} + recalibratedBamMd5: {description: "MD5 of recalibrated BAM file."} } } @@ -211,6 +219,9 @@ task BaseRecalibrator { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + recalibrationReport: {description: "A GATK Report file with many tables."} } } @@ -255,6 +266,10 @@ task CalculateContamination { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + contaminationTable: {description: "Table with fractions of reads from cross-sample contamination."} + mafTumorSegments: {description: "Tumor segments table."} } } @@ -297,6 +312,10 @@ task CallCopyRatioSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + calledSegments: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn, and the corresponding entry rows."} + calledSegmentsIgv: {description: "This is a tab-separated values (TSV) file with CBS-format column headers and the corresponding entry rows that can be plotted using IGV."} } } @@ -353,6 +372,9 @@ task CollectAllelicCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + allelicCounts: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in AllelicCountCollection.AllelicCountTableColumn, and the corresponding entry rows."} } } @@ -410,6 +432,9 @@ task CollectReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + counts: {description: "Read counts at specified intervals."} } } @@ -464,6 +489,10 @@ task CombineGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "A combined multi-sample gVCF."} + outputVcfIndex: {description: "Index of the output file."} } } @@ -535,6 +564,10 @@ task CombineVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + combinedVcf: {description: "Combined VCF file."} + combinedVcfIndex: {description: "Index of combined VCF file."} } } @@ -580,6 +613,9 @@ task CreateReadCountPanelOfNormals { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + PON: {description: "Panel-of-normals file."} } } @@ -630,6 +666,10 @@ task DenoiseReadCounts { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + standardizedCopyRatios: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CopyRatioCollection.CopyRatioTableColumn, and the corresponding entry rows."} + denoisedCopyRatios: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CopyRatioCollection.CopyRatioTableColumn, and the corresponding entry rows."} } } @@ -700,6 +740,11 @@ task FilterMutectCalls { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + filteredVcf: {description: ""} + filteredVcfIndex: {description: ""} + filteringStats: {description: ""} } } From eeff6ce5e37f75aa508fec3bf9ba38bede23dd17 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 11:01:08 +0100 Subject: [PATCH 0689/1208] Update gatk with outputs. --- gatk.wdl | 80 +++++++++++++++++++++++++++++++++++++++++++++++--- gffcompare.wdl | 9 ++++++ 2 files changed, 85 insertions(+), 4 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index d13c1175..82244caa 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -581,7 +581,8 @@ task CreateReadCountPanelOfNormals { String javaXmx = "7G" String memory = "8G" Int timeMinutes = 5 - String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason... + # The biocontainer causes a spark related error for some reason. + String dockerImage = "broadinstitute/gatk:4.1.8.0" } command { @@ -742,9 +743,9 @@ task FilterMutectCalls { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - filteredVcf: {description: ""} - filteredVcfIndex: {description: ""} - filteringStats: {description: ""} + filteredVcf: {description: "VCF file with filtered variants from a Mutect2 VCF callset."} + filteredVcfIndex: {description: "Index of output VCF file."} + filteringStats: {description: "The output filtering stats file."} } } @@ -787,6 +788,9 @@ task GatherBqsrReports { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBQSRreport: {description: "Single file with scattered BQSR recalibration reports gathered into one."} } } @@ -840,6 +844,9 @@ task GenomicsDBImport { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + genomicsDbTarArchive: {description: "Imported VCFs to GenomicsDB file."} } } @@ -907,6 +914,10 @@ task GenotypeGVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVCF: {description: "A final VCF in which all samples have been jointly genotyped. "} + outputVCFIndex: {description: "Index of final VCF file."} } } @@ -959,6 +970,9 @@ task GetPileupSummaries { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + pileups: {description: "Pileup metrics for inferring contamination."} } } @@ -1047,6 +1061,10 @@ task HaplotypeCaller { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVCF: {description: "Raw, unfiltered SNP and indel calls."} + outputVCFIndex: {description: "Index of output VCF."} } } @@ -1085,6 +1103,9 @@ task LearnReadOrientationModel { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + artifactPriorsTable: {description: "Maximum likelihood estimates of artifact prior probabilities in the orientation bias mixture model filter."} } } @@ -1123,6 +1144,9 @@ task MergeStats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedStats: {description: "Merged stats from scattered Mutect2 runs."} } } @@ -1190,6 +1214,19 @@ task ModelSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + hetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the case sample."} + copyRatioSegments: {description: "It contains the segments from the .modelFinal.seg file converted to a format suitable for input to CallCopyRatioSegments."} + copyRatioCBS: {description: "The posterior medians of the log2 copy ratio."} + alleleFractionCBS: {description: "Minor-allele fraction."} + unsmoothedModeledSegments: {description: "The initial modeled-segments result before segmentation smoothing."} + unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing"} + unsmoothedAlleleFractionParameters: {description: "The initial allele-fraction-model global-parameter result before segmentation smoothing."} + modeledSegments: {description: "The final modeled-segments result after segmentation smoothing."} + copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing"} + alleleFractionParameters: {description: "The final allele-fraction-model global-parameter result after segmentation smoothing."} + normalHetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the matched-normal sample."} } } @@ -1268,6 +1305,12 @@ task MuTect2 { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + vcfFile: {description: "Somatic SNVs and indels called via local assembly of haplotypes."} + vcfFileIndex: {description: "Index for Mutect2 VCF."} + f1r2File: {description: "Contains information that can then be passed to LearnReadOrientationModel, which generate an artifact prior table for each tumor sample for FilterMutectCalls to use."} + stats: {description: "Stats file."} } } @@ -1327,6 +1370,14 @@ task PlotDenoisedCopyRatios { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + denoisedCopyRatiosPlot: {description: "Plot showing the entire range of standardized and denoised copy ratios."} + standardizedMedianAbsoluteDeviation: {description: "Standardized median absolute deviation copy ratios."} + denoisedMedianAbsoluteDeviation: {description: "Denoised median absolute deviation copy ratios."} + deltaMedianAbsoluteDeviation: {description: "The change between `standardizedMedianAbsoluteDeviation` & `denoisedMedianAbsoluteDeviation`."} + deltaScaledMedianAbsoluteDeviation: {description: "The change between `standardizedMedianAbsoluteDeviation` & `denoisedMedianAbsoluteDeviation` scaled by standardized MAD."} + denoisedCopyRatiosLimitedPlot: {description: "Plot showing the standardized and denoised copy ratios limited to ratios within [0, 4]."} } } @@ -1384,6 +1435,9 @@ task PlotModeledSegments { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + modeledSegmentsPlot: {description: "This plot shows the input denoised copy ratios and/or alternate-allele fractions as points, as well as box plots for the available posteriors in each segment."} } } @@ -1443,6 +1497,9 @@ task PreprocessIntervals { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + intervalList: {description: "Preprocessed Picard interval-list file."} } } @@ -1501,6 +1558,10 @@ task SelectVariants { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "A new VCF file containing the selected subset of variants."} + outputVcfIndex: {description: "Index of the new output VCF file."} } } @@ -1555,6 +1616,10 @@ task SplitNCigarReads { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bam: {description: "BAM file with reads split at N CIGAR elements and CIGAR strings updated."} + bamIndex: {description: "Index of output BAM file."} } } @@ -1639,6 +1704,9 @@ task VariantEval { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + table: {description: "Evaluation tables detailing the results of the eval modules which were applied."} } } @@ -1696,5 +1764,9 @@ task VariantFiltration { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed. "} + filteredVcfIndex: {description: "Index of filtered VCF."} } } diff --git a/gffcompare.wdl b/gffcompare.wdl index 8bd53091..f8f0ade0 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -142,6 +142,15 @@ task GffCompare { namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + annotated: {description: ""} + loci: {description: ""} + stats: {description: ""} + tracking: {description: ""} + allFiles: {description: ""} + redundant: {description: ""} + missedIntrons: {description: ""} } meta { From 47aab9c501eb8c3a80250c4d993d0b2e5614ae16 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 11:33:45 +0100 Subject: [PATCH 0690/1208] Upload some more tasks. --- gffcompare.wdl | 12 ++++++------ gffread.wdl | 6 ++++++ gridss.wdl | 6 ++++++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index f8f0ade0..221c4907 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -144,13 +144,13 @@ task GffCompare { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - annotated: {description: ""} + annotated: {description: "Annotated GTF file."} loci: {description: ""} - stats: {description: ""} - tracking: {description: ""} - allFiles: {description: ""} - redundant: {description: ""} - missedIntrons: {description: ""} + stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."} + tracking: {description: "File matching transcripts up between samples."} + allFiles: {description: "A collection of all outputs files."} + redundant: {description: "File containing "duplicate"/"redundant" transcripts."} + missedIntrons: {description: "File denoting missed introns."} } meta { diff --git a/gffread.wdl b/gffread.wdl index 343011e9..66230989 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -80,5 +80,11 @@ task GffRead { filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + exonsFasta: {description: "Fasta file containing exons."} + CDSFasta: {description: "Fasta file containing CDS's."} + proteinFasta: {description: "Fasta file containing proteins."} + filteredGff: {description: "Filtered GFF file."} } } diff --git a/gridss.wdl b/gridss.wdl index 9499be5e..65579fd9 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -81,5 +81,11 @@ task GRIDSS { jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling.",category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + vcf: {description: ""} + vcfIndex: {description: ""} + assembly: {description: ""} + assemblyIndex: {description: ""} } } From 68d98441faddc47b8060d42864e11df7907bc0e6 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 14:28:07 +0100 Subject: [PATCH 0691/1208] Update more tasks. --- LICENSE | 6 ++---- README.md | 16 ++++++---------- TO-DO.md | 9 +++++++++ gffcompare.wdl | 2 +- gridss.wdl | 8 ++++---- hisat2.wdl | 4 ++++ htseq.wdl | 3 +++ manta.wdl | 7 +++++++ requirements-test.txt | 11 ++++++++++- 9 files changed, 46 insertions(+), 20 deletions(-) diff --git a/LICENSE b/LICENSE index 37eeade5..b1f2b679 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,3 @@ -MIT License - Copyright (c) 2017 Leiden University Medical Center Permission is hereby granted, free of charge, to any person obtaining a copy @@ -9,8 +7,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, diff --git a/README.md b/README.md index 246e3814..4bc58367 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,21 @@ # Tasks - This repository contains the WDL task definitions used in the various [Biowdl](https://github.com/biowdl) workflows and pipelines. - ## Documentation - -Documentation for this workflow can be found +Documentation for this repository can be found [here](https://biowdl.github.io/tasks/). ## About -These tasks are part of [Biowdl](https://github.com/biowdl) -developed by [the SASC team](http://sasc.lumc.nl/). +These workflows are part of [Biowdl](https://github.com/biowdl) +developed by the SASC team at [Leiden University Medical Center](https://www.lumc.nl/). ## Contact -

-For any question related to these tasks, please use the +For any question related to Expression-Quantification, please use the github issue tracker -or contact - the SASC team directly at: +or contact the SASC team directly at: + sasc@lumc.nl.

diff --git a/TO-DO.md b/TO-DO.md index 8c1723b6..69f359bd 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,4 +1,12 @@ #TO DO +This file describes WDL files and tasks within those files which need +more specific attention than just adding outputs to the parameter_meta. + +Some tasks have not been updated to match the new SLURM requirements and are +missing a parameter_meta section. + +Some tasks are importing other WDL files. + ## Out of date with new cluster & parameter_meta: * bamstats.wdl: `Generate` * biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, @@ -9,6 +17,7 @@ `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` * fastqsplitter.wdl: `Fastqsplitter` * flash.wdl: `Flash` +* macs2.wdl: `PeakCalling` ## Imports other tasks: * bamstats.wdl diff --git a/gffcompare.wdl b/gffcompare.wdl index 221c4907..4b0d6d22 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -149,7 +149,7 @@ task GffCompare { stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."} tracking: {description: "File matching transcripts up between samples."} allFiles: {description: "A collection of all outputs files."} - redundant: {description: "File containing "duplicate"/"redundant" transcripts."} + redundant: {description: "File containing duplicate/redundant transcripts."} missedIntrons: {description: "File denoting missed introns."} } diff --git a/gridss.wdl b/gridss.wdl index 65579fd9..5f48afac 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -83,9 +83,9 @@ task GRIDSS { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - vcf: {description: ""} - vcfIndex: {description: ""} - assembly: {description: ""} - assemblyIndex: {description: ""} + vcf: {description: "VCF file including variant allele fractions."} + vcfIndex: {description: "Index of output VCF."} + assembly: {description: "The GRIDSS assembly BAM."} + assemblyIndex: {description: "Index of output BAM file."} } } diff --git a/hisat2.wdl b/hisat2.wdl index b52bf70f..a2c0777c 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -106,5 +106,9 @@ task Hisat2 { memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bamFile: {description: "Output BAM file."} + summaryFile: {description: "Alignment summary file."} } } diff --git a/htseq.wdl b/htseq.wdl index cf527535..dfa3fcf2 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -78,5 +78,8 @@ task HTSeqCount { memory: {description: "The amount of memory the job requires in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + counts: {description: "Count table based on input BAM file."} } } diff --git a/manta.wdl b/manta.wdl index a7b7cf38..4b7ea264 100644 --- a/manta.wdl +++ b/manta.wdl @@ -79,6 +79,10 @@ task Germline { memoryGb: {description: "The memory required to run the manta", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mantaVCF: {description: ""} + mantaVCFindex: {description: ""} } } @@ -155,5 +159,8 @@ task Somatic { memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + } } diff --git a/requirements-test.txt b/requirements-test.txt index f074413b..0b01d193 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,2 +1,11 @@ +# These are the programs used for testing these biowdl tasks. +# These requirements can be installed with conda with the bioconda channel +# activated. +# For more information on how to set up conda with bioconda channel see: +# http://bioconda.github.io/#install-conda +# This file can be installed with "conda install --file requirements-test.txt". + cromwell -miniwdl \ No newline at end of file +womtool +miniwdl +wdl-aid From 20ee22a6b9b2063cd900426b54549ba98d9f60d3 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 14:32:17 +0100 Subject: [PATCH 0692/1208] Update README. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4bc58367..9d682de7 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,13 @@ Documentation for this repository can be found [here](https://biowdl.github.io/tasks/). ## About -These workflows are part of [Biowdl](https://github.com/biowdl) +These tasks are part of [Biowdl](https://github.com/biowdl) developed by the SASC team at [Leiden University Medical Center](https://www.lumc.nl/). ## Contact

-For any question related to Expression-Quantification, please use the +For any question related to these tasks, please use the github issue tracker or contact the SASC team directly at: From c9e62bf9a8b562ddee8ecd2ae12aa8784ed1d4ce Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 9 Nov 2020 17:32:07 +0100 Subject: [PATCH 0693/1208] Update tasks. --- manta.wdl | 13 ++++++++++--- multiqc.wdl | 4 ++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/manta.wdl b/manta.wdl index 4b7ea264..1c949af2 100644 --- a/manta.wdl +++ b/manta.wdl @@ -81,8 +81,8 @@ task Germline { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - mantaVCF: {description: ""} - mantaVCFindex: {description: ""} + mantaVCF: {description: "SVs and indels scored and genotyped under a diploid model for the set of samples in a joint diploid sample analysis or for the normal sample in a tumor/normal subtraction analysis."} + mantaVCFindex: {description: "Index of output mantaVCF."} } } @@ -161,6 +161,13 @@ task Somatic { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - + candidateSmallIndelsVcf: {description: "Subset of the candidateSV.vcf.gz file containing only simple insertion and deletion variants less than the minimum scored variant size."} + candidateSmallIndelsVcfIndex: {description: "Index of output VCF file candidateSmallIndelsVcf."} + candidateSVVcf: {description: "Unscored SV and indel candidates."} + candidatSVVcfIndex: {description: "Index of output VCF file candidateSVVcf."} + tumorSVVcf: {description: "Subset of the candidateSV.vcf.gz file after removing redundant candidates and small indels less than the minimum scored variant size."} + tumorSVVcfIndex: {description: "Index of output VCF file tumorSVVcf."} + diploidSV: {description: "SVs and indels scored and genotyped under a diploid model for the set of samples in a joint diploid sample analysis or for the normal sample in a tumor/normal subtraction analysis."} + diploidSVindex: {description: "Index of output VCF file diploidSV."} } } diff --git a/multiqc.wdl b/multiqc.wdl index 647394e9..1d248dd6 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -177,5 +177,9 @@ task MultiQC { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + multiqcReport: {description: ""} + multiqcDataDirZip: {description: ""} } } From 100d8add0f092f9396be00cce03491cf0fffa654 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 10 Nov 2020 09:52:53 +0100 Subject: [PATCH 0694/1208] Add optional gvcf index input --- deepvariant.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 8b08e111..6a2b70b3 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -28,11 +28,11 @@ task RunDeepVariant { File inputBamIndex String modelType String outputVcf - String? postprocessVariantsExtraArgs File? customizedModel Int? numShards String? outputGVcf + String? outputGVcfIndex File? regions String? sampleName Boolean? VCFStatsReport = true @@ -44,6 +44,7 @@ task RunDeepVariant { command { set -e + /opt/deepvariant/bin/run_deepvariant \ --ref ~{referenceFasta} \ --reads ~{inputBam} \ @@ -66,10 +67,10 @@ task RunDeepVariant { output { File outputVCF = outputVcf - File outputVCFIndex = outputVCF + ".tbi" + File outputVCFIndex = outputVcf + ".tbi" Array[File] outputVCFStatsReport = glob("*.visual_report.html") File? outputGVCF = outputGVcf - File? outputGVCFIndex = outputGVcf + ".tbi" + File? outputGVCFIndex = outputGVcfIndex } parameter_meta { @@ -84,6 +85,7 @@ task RunDeepVariant { customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} + outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to outputGVcf+.tbi.", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} From 4836726ee8677c83d7cc1a1175be85435ab695bc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 10 Nov 2020 09:53:28 +0100 Subject: [PATCH 0695/1208] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9bff5f3c..48d98036 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ DeepVariant: Add an optional input for the gvcf index. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. + Picard: Add parameter_meta to `SortSam`. + pbmm2: Add parameter_meta for `sample`. From aae72ec8459f36a3beb813c824553b56b1a820f5 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 13:09:12 +0100 Subject: [PATCH 0696/1208] Add more updates. --- TO-DO.md | 2 ++ deepvariant.wdl | 3 +-- multiqc.wdl | 4 ++-- pacbio.wdl | 8 +++++++- pbbam.wdl | 4 ++++ picard.wdl | 9 +++++++++ 6 files changed, 25 insertions(+), 5 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 69f359bd..7368005a 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -18,6 +18,8 @@ Some tasks are importing other WDL files. * fastqsplitter.wdl: `Fastqsplitter` * flash.wdl: `Flash` * macs2.wdl: `PeakCalling` +* ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` +* ## Imports other tasks: * bamstats.wdl diff --git a/deepvariant.wdl b/deepvariant.wdl index 618200aa..28aee813 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -44,7 +44,6 @@ task RunDeepVariant { command { set -e - /opt/deepvariant/bin/run_deepvariant \ --ref ~{referenceFasta} \ --reads ~{inputBam} \ @@ -85,7 +84,7 @@ task RunDeepVariant { customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"} numShards: {description: "Number of shards for make_examples step.", category: "common"} outputGVcf: {description: "Path where we should write gVCF file.", category: "common"} - outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to outputGVcf+.tbi.", category: "common"} + outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to `outputGVcf + '.tbi.'`", category: "common"} regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"} sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"} VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"} diff --git a/multiqc.wdl b/multiqc.wdl index 1d248dd6..405c0a0b 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -179,7 +179,7 @@ task MultiQC { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - multiqcReport: {description: ""} - multiqcDataDirZip: {description: ""} + multiqcReport: {description: "Results from bioinformatics analyses across many samples in a single report."} + multiqcDataDirZip: {description: "The parsed data directory compressed with zip."} } } diff --git a/pacbio.wdl b/pacbio.wdl index 01f6d4fd..df0343d9 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -43,7 +43,7 @@ task mergePacBio { } output { - File MergedReport = mergedReport + File outputMergedReport = mergedReport } parameter_meta { @@ -52,6 +52,9 @@ task mergePacBio { mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputMergedReport: {description: "The PacBio reports merged into one."} } } @@ -85,5 +88,8 @@ task ccsChunks { chunkCount: {description: "The number of chunks to create.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + chunks: {description: "The chunks created based on `chunkCount`."} } } diff --git a/pbbam.wdl b/pbbam.wdl index d271a11a..d893e64d 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -66,5 +66,9 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indexedBam: {description: "The original input BAM file."} + index: {description: "The index of the input BAM file."} } } diff --git a/picard.wdl b/picard.wdl index f1876f7b..d288f0e5 100644 --- a/picard.wdl +++ b/picard.wdl @@ -61,6 +61,9 @@ task BedToIntervalList { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + intervalList: {description: "Picard Interval List from a BED file."} } } @@ -125,6 +128,9 @@ task CollectHsMetrics { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + HsMetrics: {description: "Hybrid-selection (HS) metrics for the input BAM file."} } } @@ -240,6 +246,9 @@ task CollectMultipleMetrics { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + alignmentSummary: {description: ""} } } From f111c363b74ec64ee7ba06db7a7ad2b3f3ada05a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 17:00:31 +0100 Subject: [PATCH 0697/1208] Update style. --- CHANGELOG.md | 13 +++++-------- README.md | 16 ++++++++-------- 2 files changed, 13 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 836af4ac..0b668ab1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,6 @@ Changelog ========== -For any question related to these tasks, please use the -github issue tracker +For any question related to Tasks, please use the +github issue tracker or contact the SASC team directly at: - + sasc@lumc.nl.

From b633bd790ee89de61a1673092b9d98cb4006d91e Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 17:06:03 +0100 Subject: [PATCH 0698/1208] Update more tasks. --- picard.wdl | 31 +++++++++++++++++++++++++++++++ rtg.wdl | 6 ++++++ 2 files changed, 37 insertions(+) diff --git a/picard.wdl b/picard.wdl index d288f0e5..fd278958 100644 --- a/picard.wdl +++ b/picard.wdl @@ -249,6 +249,24 @@ task CollectMultipleMetrics { # outputs alignmentSummary: {description: ""} + baitBiasDetail: {description: ""} + baitBiasSummary: {description: ""} + baseDistributionByCycle: {description: ""} + baseDistributionByCyclePdf: {description: ""} + errorSummary: {description: ""} + gcBiasDetail: {description: ""} + gcBiasPdf: {description: ""} + gcBiasSummary: {description: ""} + insertSizeHistogramPdf: {description: ""} + insertSize: {description: ""} + preAdapterDetail: {description: ""} + preAdapterSummary: {description: ""} + qualityByCycle: {description: ""} + qualityByCyclePdf: {description: ""} + qualityDistribution: {description: ""} + qualityDistributionPdf: {description: ""} + qualityYield: {description: ""} + allStats: {description: ""} } } @@ -301,6 +319,10 @@ task CollectRnaSeqMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + metrics: {description: ""} + chart: {description: ""} } } @@ -361,6 +383,11 @@ task CollectTargetedPcrMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + perTargetCoverage: {description: ""} + perBaseCoverage: {description: ""} + metrics: {description: ""} } } @@ -410,6 +437,10 @@ task CollectVariantCallingMetrics { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + details: {description: ""} + summary: {description: ""} } } diff --git a/rtg.wdl b/rtg.wdl index bfd32957..a6f8da52 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -59,6 +59,9 @@ task Format { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + sdf: {description: ""} } } @@ -161,5 +164,8 @@ task VcfEval { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + } } From 8c42b1e1de607623fa00863472c9570158e6e495 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 10 Nov 2020 17:49:13 +0100 Subject: [PATCH 0699/1208] Update more tasks. --- TO-DO.md | 2 +- rtg.wdl | 18 +++++++++++++++--- smoove.wdl | 3 +++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 7368005a..e9824dfb 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -19,7 +19,7 @@ Some tasks are importing other WDL files. * flash.wdl: `Flash` * macs2.wdl: `PeakCalling` * ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` -* +* seqtk.wdl: `Sample` ## Imports other tasks: * bamstats.wdl diff --git a/rtg.wdl b/rtg.wdl index a6f8da52..0e86ce3f 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -52,7 +52,7 @@ task Format { parameter_meta { # inputs - inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"} + inputFiles: {description: "Input sequence files. May be specified 1 or more times.", category: "required"} format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", category: "advanced"} outputPath: {description: "Where the output should be placed.", category: "advanced"} rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"} @@ -61,7 +61,7 @@ task Format { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - sdf: {description: ""} + sdf: {description: "RTGSequence Data File (SDF) format version of the input file(s)."} } } @@ -166,6 +166,18 @@ task VcfEval { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - + falseNegativesVcf: {description: "Variants from thebaselineVCF which were not correctly called."} + falseNegativesVcfIndex: {description: "Index of the output VCF file `falseNegativesVcf`."} + falsePositivesVcf: {description: "Variants from thecallsVCF which do not agree with baseline variants."} + falsePositivesVcfIndex: {description: "Index of the output VCF file `falsePositivesVcf`."} + summary: {description: "Summary statistic file."} + truePositivesBaselineVcf: {description: "Variants from thebaselineVCF which agree with variants in thecalls VCF."} + truePositivesBaselineVcfIndex: {description: "Index of the output VCF file `truePositivesBaselineVcf`."} + truePositivesVcf: {description: "Variants from thecallsVCF which agree with variants in the baseline VCF."} + truePositivesVcfIndex: {description: "Index of the output VCF file `truePositivesVcf`."} + nonSnpRoc: {description: "ROC data derived from those variants which were not represented asSNPs."} + phasing: {description: "Phasing file."} + weightedRoc: {description: "ROC data derived from all analyzed call variants, regardless of their representation."} + allStats: {description: "All output files combined in a array."} } } diff --git a/smoove.wdl b/smoove.wdl index 244c2cac..cafc6b08 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -65,5 +65,8 @@ task Call { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + smooveVcf: {description: "Calls and genotyping of structural variants in VCF file." } } From 3eeef3f777e4863f9da50e51f9ed0699578d28c2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 10:01:21 +0100 Subject: [PATCH 0700/1208] Update 2 new tasks. --- samtools.wdl | 35 +++++++++++++++++++++++++++++++++++ somaticseq.wdl | 6 ++++++ 2 files changed, 41 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index e274cf58..fcd996c7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -56,6 +56,10 @@ task BgzipAndIndex { type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + compressed: {description: ""} + index: {description: ""} } } @@ -161,6 +165,11 @@ task Fastq { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + read1: {description: ""} + read2: {description: ""} + read0: {description: ""} } } @@ -203,6 +212,10 @@ task FilterShortReadsBam { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + filteredBam: {description: ""} + filteredBamIndex: {description: ""} } } @@ -239,6 +252,9 @@ task Flagstat { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + flagstat: {description: ""} } } @@ -288,6 +304,10 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indexedBam: {description: ""} + index: {description: ""} } } @@ -321,6 +341,9 @@ task Markdup { outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} } } @@ -370,6 +393,10 @@ task Merge { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} } } @@ -474,6 +501,10 @@ task Tabix { type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indexedFile: {description: ""} + index: {description: ""} } } @@ -540,5 +571,9 @@ task View { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} } } diff --git a/somaticseq.wdl b/somaticseq.wdl index 1c73fc58..0cd944c6 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -123,6 +123,12 @@ task ParallelPaired { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indels: {description: ""} + snvs: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} } } From e2461ff107d8d070c063ea47a782929e95bebb80 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 10:27:49 +0100 Subject: [PATCH 0701/1208] Fix travis error. --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index cafc6b08..82079b2f 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -67,6 +67,6 @@ task Call { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls and genotyping of structural variants in VCF file." + smooveVcf: {description: "Calls and genotyping of structural variants in VCF file."} } } From 7dff854e906a14db3f69647b5f35596a9687d703 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 13:45:43 +0100 Subject: [PATCH 0702/1208] Update even more outputs. --- TO-DO.md | 11 ++++++----- somaticseq.wdl | 25 +++++++++++++++++++++++++ star.wdl | 22 ++++++++++++++++++++++ strelka.wdl | 10 ++++++++++ stringtie.wdl | 7 +++++++ survivor.wdl | 3 +++ umi-tools.wdl | 11 +++++++++++ vardict.wdl | 3 +++ vt.wdl | 3 +++ 9 files changed, 90 insertions(+), 5 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index e9824dfb..b54d995c 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -20,6 +20,9 @@ Some tasks are importing other WDL files. * macs2.wdl: `PeakCalling` * ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` * seqtk.wdl: `Sample` +* spades.wdl: `Spades` +* unicycler.wdl: `Unicycler` + ## Imports other tasks: * bamstats.wdl @@ -27,9 +30,7 @@ Some tasks are importing other WDL files. * sampleconfig.wdl * seqstat.wdl * clever.wdl +* strelka.wdl -## Requires parameter_meta: -* - -## Duplicate tasks: -* +## Requires input from others: +* somaticseq.wdl diff --git a/somaticseq.wdl b/somaticseq.wdl index 0cd944c6..07103ef9 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -234,6 +234,14 @@ task ParallelPairedTrain { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + consensusIndels: {description: ""} + consensusSNV: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} + ensembleIndelsClassifier: {description: ""} + ensembleSNVClassifier: {description: ""} } } @@ -317,6 +325,12 @@ task ParallelSingle { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indels: {description: ""} + snvs: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} } } @@ -399,6 +413,14 @@ task ParallelSingleTrain { threads: {description: "The number of threads to use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + consensusIndels: {description: ""} + consensusSNV: {description: ""} + ensembleIndels: {description: ""} + ensembleSNV: {description: ""} + ensembleIndelsClassifier: {description: ""} + ensembleSNVClassifier: {description: ""} } } @@ -435,5 +457,8 @@ task ModifyStrelka { outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: ""} } } diff --git a/star.wdl b/star.wdl index 68193fcd..aa1fd608 100644 --- a/star.wdl +++ b/star.wdl @@ -86,6 +86,24 @@ task GenomeGenerate { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + chrLength: {description: "Text chromosome lengths file."} + chrNameLength: {description: "Text chromosome name lengths file."} + chrName: {description: "Text chromosome names file."} + chrStart: {description: "Chromosome start sites file."} + genome: {description: "Binary genome sequence file."} + genomeParameters: {description: "Genome parameters file."} + sa: {description: "Suffix arrays file."} + saIndex: {description: "Index file of suffix arrays."} + exonGeTrInfo: {description: "Exon, gene and transcript information file."} + exonInfo: {description: "Exon information file."} + geneInfo: {description: "Gene information file."} + sjdbInfo: {description: "Splice junctions coordinates file."} + sjdbListFromGtfOut: {description: "Splice junctions from input GTF file."} + sjdbListOut: {description: "Splice junction list file."} + transcriptInfo: {description: "Transcripts information file."} + starIndex: {description: "A collection of all STAR index files."} } } @@ -181,6 +199,10 @@ task Star { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bamFile: {description: "Alignment file."} + logFinalOut: {description: "Log information file."} } } diff --git a/strelka.wdl b/strelka.wdl index f4b9888b..be08e386 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -83,6 +83,10 @@ task Germline { memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + variants: {description: "Output VCF file."} + variantsIndex: {description: "Index of output VCF file."} } } @@ -158,6 +162,12 @@ task Somatic { memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + indelsVcf: {description: "VCF containing INDELS."} + indelsIndex: {description: "Index of output `indelsVcf`."} + variants: {description: "VCF containing variants."} + variantsIndex: {description: "Index of output `variants`."} } meta { diff --git a/stringtie.wdl b/stringtie.wdl index fff4140c..05df05c6 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -78,6 +78,10 @@ task Stringtie { memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + assembledTranscripts: {description: "GTF file containing the assembled transcripts."} + geneAbundance: {description: "Gene abundances in tab-delimited format."} } } @@ -141,5 +145,8 @@ task Merge { memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedGtfFile: {description: "A merged GTF file from a set of GTF files."} } } diff --git a/survivor.wdl b/survivor.wdl index c7b31058..8b0360d8 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -74,5 +74,8 @@ task Merge { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + mergedVcf: {description: "All the vcf files specified in fileList merged."} } } diff --git a/umi-tools.wdl b/umi-tools.wdl index 7b0a3991..6524d656 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -66,6 +66,10 @@ task Extract { threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + extractedRead1: {description: "First read with UMI extracted to read name."} + extractedRead2: {description: "Second read with UMI extracted to read name."} } } @@ -124,5 +128,12 @@ task Dedup { memory: {description: "The amount of memory required for the task.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + deduppedBam: {description: "Deduplicated BAM file."} + deduppedBamIndex: {description: "Index of the deduplicated BAM file."} + editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."} + umiStats: {description: "UMI-level summary statistics."} + positionStats: {description: "The counts for unique combinations of UMI and position."} } } diff --git a/vardict.wdl b/vardict.wdl index fc37c9ef..1c20e51c 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -118,5 +118,8 @@ task VarDict { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + vcfFile: {description: "Output VCF file."} } } diff --git a/vt.wdl b/vt.wdl index 95585ff2..94414050 100644 --- a/vt.wdl +++ b/vt.wdl @@ -64,5 +64,8 @@ task Normalize { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Output VCF file."} } } From 7d76ed6c3e0bfa5ab679deb54ef24da0955d1ed0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 14:45:04 +0100 Subject: [PATCH 0703/1208] Update TO-DO. --- TO-DO.md | 2 +- whatshap.wdl | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/TO-DO.md b/TO-DO.md index b54d995c..9a7db355 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -22,7 +22,7 @@ Some tasks are importing other WDL files. * seqtk.wdl: `Sample` * spades.wdl: `Spades` * unicycler.wdl: `Unicycler` - +* wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` ## Imports other tasks: * bamstats.wdl diff --git a/whatshap.wdl b/whatshap.wdl index 5c69400a..7307ce7c 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -92,6 +92,10 @@ task Phase { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + phasedVCF: {description: "VCF file containing phased variants."} + phasedVCFIndex: {description: "Index of phased VCF file."} } } @@ -144,6 +148,11 @@ task Stats { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + phasedGTF: {description: "Phasing statistics for a single VCF file."} + phasedTSV: {description: "Statistics in a tab-separated value format."} + phasedBlockList: {description: "List of the total number of phase sets/blocks."} } } @@ -204,5 +213,9 @@ task Haplotag { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + bam: {description: "BAM file containing tagged reads for haplotype."} + bamIndex: {description: "Index of the tagged BAM file."} } } From 7dab07f86c611fdc26fe5863ae6eb5d155be430f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 16:46:21 +0100 Subject: [PATCH 0704/1208] Update picard. --- TO-DO.md | 1 + picard.wdl | 54 +++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 9a7db355..9216bc0c 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -23,6 +23,7 @@ Some tasks are importing other WDL files. * spades.wdl: `Spades` * unicycler.wdl: `Unicycler` * wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` +* picard.wdl: `ScatterIntervalList` ## Imports other tasks: * bamstats.wdl diff --git a/picard.wdl b/picard.wdl index fd278958..2005fe28 100644 --- a/picard.wdl +++ b/picard.wdl @@ -321,8 +321,8 @@ task CollectRnaSeqMetrics { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - metrics: {description: ""} - chart: {description: ""} + metrics: {description: "Metrics describing the distribution of bases within the transcripts."} + chart: {description: "Plot of normalized position vs. coverage."} } } @@ -385,9 +385,9 @@ task CollectTargetedPcrMetrics { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - perTargetCoverage: {description: ""} - perBaseCoverage: {description: ""} - metrics: {description: ""} + perTargetCoverage: {description: "Per target coverage information."} + perBaseCoverage: {description: "Per base coverage information to."} + metrics: {description: "File containing metrics."} } } @@ -539,6 +539,11 @@ task GatherBamFiles { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} + outputBamMd5: {description: ""} } } @@ -582,6 +587,9 @@ task GatherVcfs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: ""} } } @@ -666,6 +674,12 @@ task MarkDuplicates { memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: ""} + outputBamIndex: {description: ""} + outputBamMd5: {description: ""} + metricsFile: {description: ""} } } @@ -725,6 +739,10 @@ task MergeVCFs { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Multiple variant files combined into a single variant file."} + outputVcfIndex: {description: "Index of `outputVcf`."} } } @@ -768,6 +786,21 @@ task SamToFastq { docker: dockerImage } + parameter_meta { + # inputs + inputBam: {description: "Input BAM file to extract reads from.", category: "required"} + inputBamIndex: {description: "Input BAM index file.", category: "required"} + paired: {description: "Set to false when input data is single-end.", category: "common"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + read1: {description: "Fastq file containing reads from the first pair."} + read2: {description: "Fastq file containing reads from the second pair."} + unpairedRead: {description: "Fastq file containing unpaired reads."} + meta { WDL_AID: { exclude: ["noneFile"] @@ -864,6 +897,10 @@ task SortSam { XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Index of sorted BAM file."} } } @@ -911,6 +948,10 @@ task SortVcf { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file(s)."} + outputVcfIndex: {description: "Index(es) of sort(ed) VCF file(s)."} } } @@ -955,5 +996,8 @@ task RenameSample { memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + renamedVcf: {description: "New VCF with renamed sample."} } } From b22cc9f9ba8e20e4685005bede66fe0dc129ccd8 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 17:02:49 +0100 Subject: [PATCH 0705/1208] Fix travis error. --- picard.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/picard.wdl b/picard.wdl index 2005fe28..cc2634f0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -800,6 +800,7 @@ task SamToFastq { read1: {description: "Fastq file containing reads from the first pair."} read2: {description: "Fastq file containing reads from the second pair."} unpairedRead: {description: "Fastq file containing unpaired reads."} + } meta { WDL_AID: { From ccc38727ddf49a3cebb566fadf7145675b0eafa2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 17:19:50 +0100 Subject: [PATCH 0706/1208] Update samtools. --- samtools.wdl | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index fcd996c7..0aecf4ee 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -58,8 +58,8 @@ task BgzipAndIndex { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - compressed: {description: ""} - index: {description: ""} + compressed: {description: "Compressed input file."} + index: {description: "Index of the compressed input file."} } } @@ -167,9 +167,9 @@ task Fastq { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - read1: {description: ""} - read2: {description: ""} - read0: {description: ""} + read1: {description: "Reads with the READ1 FLAG set."} + read2: {description: "Reads with the READ2 FLAG set."} + read0: {description: "Reads with either READ1 FLAG or READ2 flag set."} } } @@ -214,8 +214,8 @@ task FilterShortReadsBam { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - filteredBam: {description: ""} - filteredBamIndex: {description: ""} + filteredBam: {description: "BAM file filtered for short reads."} + filteredBamIndex: {description: "Index of filtered BAM file."} } } @@ -254,7 +254,7 @@ task Flagstat { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - flagstat: {description: ""} + flagstat: {description: "The number of alignments for each FLAG type."} } } @@ -306,8 +306,8 @@ task Index { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - indexedBam: {description: ""} - index: {description: ""} + indexedBam: {description: "BAM file that was indexed."} + index: {description: "Index of the input BAM file."} } } @@ -343,7 +343,7 @@ task Markdup { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} + outputBam: {description: "BAM file with duplicate alignments marked."} } } @@ -395,8 +395,8 @@ task Merge { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} - outputBamIndex: {description: ""} + outputBam: {description: "Multiple BAM files merged into one."} + outputBamIndex: {description: "Index of the merged BAM file."} } } @@ -503,8 +503,8 @@ task Tabix { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - indexedFile: {description: ""} - index: {description: ""} + indexedFile: {description: "Indexed input file."} + index: {description: "Index of the input file."} } } @@ -573,7 +573,7 @@ task View { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} - outputBamIndex: {description: ""} + outputBam: {description: "Processed input file."} + outputBamIndex: {description: "Index of the processed input file."} } } From 4c56f143264390a79319c7c85e3dcca7732fb0f2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Wed, 11 Nov 2020 17:24:47 +0100 Subject: [PATCH 0707/1208] Update picard. --- picard.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/picard.wdl b/picard.wdl index cc2634f0..20fd1f95 100644 --- a/picard.wdl +++ b/picard.wdl @@ -541,9 +541,9 @@ task GatherBamFiles { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputBam: {description: ""} - outputBamIndex: {description: ""} - outputBamMd5: {description: ""} + outputBam: {description: "Concatenated BAM files."} + outputBamIndex: {description: "Index of the output `outputBam`."} + outputBamMd5: {description: "MD5 of the output `outputBam`."} } } @@ -589,7 +589,7 @@ task GatherVcfs { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: ""} + outputVcf: {description: "Multiple VCF files gathered into one file."} } } From 19610fe328fbfee31e922684663d9a190e631194 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Thu, 12 Nov 2020 12:03:18 +0100 Subject: [PATCH 0708/1208] Update smoove.wdl --- smoove.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 82079b2f..e5c5348f 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -67,6 +67,6 @@ task Call { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls and genotyping of structural variants in VCF file."} + smooveVcf: {description: "Calls of structural variants in VCF file."} } } From 7aea19d5feeab4aa5ff5a035216157d375dad116 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Thu, 12 Nov 2020 12:11:18 +0100 Subject: [PATCH 0709/1208] Update vt.wdl --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 94414050..85077dae 100644 --- a/vt.wdl +++ b/vt.wdl @@ -66,6 +66,6 @@ task Normalize { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Output VCF file."} + outputVcf: {description: "Normalized & decomposed VCF file."} } } From c3255755087999b129670fda036bfbe4fe6771d6 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Nov 2020 12:13:56 +0100 Subject: [PATCH 0710/1208] Update gffcompare. --- biopet/bamstats.wdl | 70 ----- biopet/biopet.wdl | 552 ---------------------------------------- biopet/sampleconfig.wdl | 143 ----------- biopet/seqstat.wdl | 64 ----- gffcompare.wdl | 6 +- 5 files changed, 3 insertions(+), 832 deletions(-) delete mode 100644 biopet/bamstats.wdl delete mode 100644 biopet/biopet.wdl delete mode 100644 biopet/sampleconfig.wdl delete mode 100644 biopet/seqstat.wdl diff --git a/biopet/bamstats.wdl b/biopet/bamstats.wdl deleted file mode 100644 index d01bc10c..00000000 --- a/biopet/bamstats.wdl +++ /dev/null @@ -1,70 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" as common - -task Generate { - input { - IndexedBamFile bam - Boolean scatterMode = false - Boolean onlyUnmapped = false - Boolean tsvOutputs = false - String outputDir - - String? preCommand - File? toolJar - File? bedFile - Reference? reference - - String javaXmx = "8G" - String memory = "9G" - } - - File referenceFasta = if defined(reference) then select_first([reference]).fasta else "" - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-bamstats -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p ~{outputDir} - ~{toolCommand} Generate \ - --bam ~{bam.file} \ - ~{"--bedFile " + bedFile} \ - ~{true="--reference" false="" defined(reference)} ~{referenceFasta} \ - ~{true="--onlyUnmapped" false="" onlyUnmapped} \ - ~{true="--scatterMode" false="" scatterMode} \ - ~{true="--tsvOutputs" false="" tsvOutputs} \ - --outputDir ~{outputDir} - } - - output { - File json = outputDir + "/bamstats.json" - File summaryJson = outputDir + "/bamstats.summary.json" - } - - runtime { - memory: memory - } -} diff --git a/biopet/biopet.wdl b/biopet/biopet.wdl deleted file mode 100644 index e6619e09..00000000 --- a/biopet/biopet.wdl +++ /dev/null @@ -1,552 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" - -task BaseCounter { - input { - IndexedBamFile bam - File refFlat - String outputDir - String prefix - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-basecounter -Xmx~{javaXmx}" - - command { - set -e -o pipefail - mkdir -p ~{outputDir} - ~{preCommand} - ~{toolCommand} \ - -b ~{bam.file} \ - -r ~{refFlat} \ - -o ~{outputDir} \ - -p ~{prefix} - } - - output { - File exonAntisense = outputDir + "/" + prefix + ".base.exon.antisense.counts" - File exon = outputDir + "/" + prefix + ".base.exon.counts" - File exonMergeAntisense = outputDir + "/" + prefix + ".base.exon.merge.antisense.counts" - File exonMerge = outputDir + "/" + prefix + ".base.exon.merge.counts" - File exonMergeSense = outputDir + "/" + prefix + ".base.exon.merge.sense.counts" - File exonSense = outputDir + "/" + prefix + ".base.exon.sense.counts" - File geneAntisense = outputDir + "/" + prefix + ".base.gene.antisense.counts" - File gene = outputDir + "/" + prefix + ".base.gene.counts" - File geneExonicAntisense = outputDir + "/" + prefix + ".base.gene.exonic.antisense.counts" - File geneExonic = outputDir + "/" + prefix + ".base.gene.exonic.counts" - File geneExonicSense = outputDir + "/" + prefix + ".base.gene.exonic.sense.counts" - File geneIntronicAntisense = outputDir + "/" + prefix + ".base.gene.intronic.antisense.counts" - File geneIntronic = outputDir + "/" + prefix + ".base.gene.intronic.counts" - File geneIntronicSense = outputDir + "/" + prefix + ".base.gene.intronic.sense.counts" - File geneSense = outputDir + "/" + prefix + ".base.gene.sense.counts" - File intronAntisense = outputDir + "/" + prefix + ".base.intron.antisense.counts" - File intron = outputDir + "/" + prefix + ".base.intron.counts" - File intronMergeAntisense = outputDir + "/" + prefix + ".base.intron.merge.antisense.counts" - File intronMerge = outputDir + "/" + prefix + ".base.intron.merge.counts" - File intronMergeSense = outputDir + "/" + prefix + ".base.intron.merge.sense.counts" - File intronSense = outputDir + "/" + prefix + ".base.intron.sense.counts" - File metaExonsNonStranded = outputDir + "/" + prefix + ".base.metaexons.non_stranded.counts" - File metaExonsStrandedAntisense = outputDir + "/" + prefix + ".base.metaexons.stranded.antisense.counts" - File metaExonsStranded = outputDir + "/" + prefix + ".base.metaexons.stranded.counts" - File metaExonsStrandedSense = outputDir + "/" + prefix + ".base.metaexons.stranded.sense.counts" - File transcriptAntisense = outputDir + "/" + prefix + ".base.transcript.antisense.counts" - File transcript = outputDir + "/" + prefix + ".base.transcript.counts" - File transcriptExonicAntisense = outputDir + "/" + prefix + ".base.transcript.exonic.antisense.counts" - File transcriptExonic = outputDir + "/" + prefix + ".base.transcript.exonic.counts" - File transcriptExonicSense = outputDir + "/" + prefix + ".base.transcript.exonic.sense.counts" - File transcriptIntronicAntisense = outputDir + "/" + prefix + ".base.transcript.intronic.antisense.counts" - File transcriptIntronic = outputDir + "/" + prefix + ".base.transcript.intronic.counts" - File transcriptIntronicSense = outputDir + "/" + prefix + ".base.transcript.intronic.sense.counts" - File transcriptSense = outputDir + "/" + prefix + ".base.transcript.sense.counts" - } - - runtime { - memory: memory - } -} - -task ExtractAdaptersFastqc { - input { - File inputFile - String outputDir - String adapterOutputFilePath = outputDir + "/adapter.list" - String contamsOutputFilePath = outputDir + "/contaminations.list" - - Boolean? skipContams - File? knownContamFile - File? knownAdapterFile - Float? adapterCutoff - Boolean? outputAsFasta - - String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/biopet-extractadaptersfastqc:0.2--1" - } - - command { - set -e - mkdir -p ~{outputDir} - biopet-extractadaptersfastqc -Xmx~{javaXmx} \ - --inputFile ~{inputFile} \ - ~{"--adapterOutputFile " + adapterOutputFilePath } \ - ~{"--contamsOutputFile " + contamsOutputFilePath } \ - ~{"--knownContamFile " + knownContamFile} \ - ~{"--knownAdapterFile " + knownAdapterFile} \ - ~{"--adapterCutoff " + adapterCutoff} \ - ~{true="--skipContams" false="" skipContams} \ - ~{true="--outputAsFasta" false="" outputAsFasta} - } - - output { - File adapterOutputFile = adapterOutputFilePath - File contamsOutputFile = contamsOutputFilePath - Array[String] adapterList = read_lines(adapterOutputFile) - Array[String] contamsList = read_lines(contamsOutputFile) - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - # inputs - inputFile: {description: "Input fastq file.", category: "required"} - outputDir: {description: "The path to which the output should be written.", category: "required"} - adapterOutputFilePath: {description: "Output file for adapters, if not supplied output will go to stdout.", category: "common"} - contamsOutputFilePath: {description: "Output file for contaminations, if not supplied output will go to stdout.", category: "common"} - skipContams: {description: "If this is set only the adapters block is used, other wise contaminations is also used.", category: "advanced"} - knownContamFile: {description: "This file should contain the known contaminations from fastqc.", category: ""advanced} - knownAdapterFile: {description: "This file should contain the known adapters from fastqc.", category: "advanced"} - adapterCutoff: {description: "The fraction of the adapters in a read should be above this fraction, default is 0.001.", category: "advanced"} - outputAsFasta: {description: "Output in fasta format, default only sequences.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # outputs - adapterOutputFile: {description: "Output file with adapters."} - contamsOutputFile: {description: "Output file with contaminations."} - adapterList: {description: "List of adapters."} - contamsList: {description: "List of contaminations."} - } -} - -task FastqSplitter { - input { - File inputFastq - Array[String]+ outputPaths - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - String dockerImage = "quay.io/biocontainers/biopet-fastqsplitter:0.1--2" - } - - command { - set -e - mkdir -p $(dirname ~{sep=') $(dirname ' outputPaths}) - biopet-fastqsplitter -Xmx~{javaXmx} \ - -I ~{inputFastq} \ - -o ~{sep=' -o ' outputPaths} - } - - output { - Array[File] chunks = outputPaths - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task FastqSync { - input { - FastqPair refFastq - FastqPair inputFastq - String out1path - String out2path - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-fastqsync -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{out1path}) $(dirname ~{out2path}) - ~{toolCommand} \ - --in1 ~{inputFastq.R1} \ - --in2 ~{inputFastq.R2} \ - --ref1 ~{refFastq.R1} \ - --ref2 ~{refFastq.R2} \ - --out1 ~{out1path} \ - --out2 ~{out2path} - } - - output { - FastqPair out1 = object { - R1: out1path, - R2: out2path - } - } - - runtime { - memory: memory - } -} - -task ScatterRegions { - input { - File referenceFasta - File referenceFastaDict - Int scatterSizeMillions = 1000 - Boolean notSplitContigs = false - - Int? scatterSize - File? regions - File? bamFile - File? bamIndex - - String javaXmx = "500M" - String memory = "1G" - Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/biopet-scatterregions:0.2--0" - } - - # OutDirPath must be defined here because the glob process relies on - # linking. This path must be in the containers filesystem, otherwise the - # linking does not work. - String outputDirPath = "scatters" - String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" - - command <<< - set -e -o pipefail - mkdir -p ~{outputDirPath} - biopet-scatterregions -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -R ~{referenceFasta} \ - -o ~{outputDirPath} \ - ~{"-s " + finalSize} \ - ~{"-L " + regions} \ - ~{"--bamFile " + bamFile} \ - ~{true="--notSplitContigs" false="" notSplitContigs} - - # Glob messes with order of scatters (10 comes before 1), which causes - # problems at gatherGvcfs - # Therefore we reorder the scatters with python. - python << CODE - import os - scatters = os.listdir("~{outputDirPath}") - splitext = [ x.split(".") for x in scatters] - splitnum = [x.split("-") + [y] for x,y in splitext] - ordered = sorted(splitnum, key=lambda x: int(x[1])) - merged = ["~{outputDirPath}/{}-{}.{}".format(x[0],x[1],x[2]) for x in ordered] - for x in merged: - print(x) - CODE - >>> - - output { - Array[File] scatters = read_lines(stdout()) - } - - runtime { - memory: memory - time_minutes: timeMinutes - docker: dockerImage - } - - parameter_meta { - # inputs - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - scatterSizeMillions: {description: "Over how many million base pairs should be scattered.", category: "common"} - notSplitContigs: {description: "Equivalent to biopet scatterregions' `--notSplitContigs` flag.", category: "advanced"} - scatterSize: {description: "Overrides scatterSizeMillions with a smaller value if set.", category: "advanced"} - regions: {description: "The regions to be scattered.", category: "advanced"} - bamFile: {description: "Equivalent to biopet scatterregions' `--bamfile` option.", category: "advanced"} - bamIndex: {description: "The index for the bamfile given through bamFile.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # outputs - scatters: {description: "Smaller scatter regions of equal size."} - } -} - -task ValidateAnnotation { - input { - Reference reference - - File? refRefflat - File? gtfFile - - String javaXmx = "3G" - String memory = "4G" - String dockerImage = "quay.io/biocontainers/biopet-validateannotation:0.1--0" - } - - command { - biopet-validateannotation -Xmx~{javaXmx} \ - ~{"-r " + refRefflat} \ - ~{"-g " + gtfFile} \ - -R ~{reference.fasta} - } - - output { - File stderr = stderr() - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task ValidateFastq { - input { - File read1 - File? read2 - - String javaXmx = "3G" - String memory = "4G" - String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--1" - } - - command { - biopet-validatefastq -Xmx~{javaXmx} \ - --fastq1 ~{read1} \ - ~{"--fastq2 " + read2} - } - - output { - File stderr = stderr() - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task ValidateVcf { - input { - IndexedVcfFile vcf - Reference reference - - String javaXmx = "3G" - String memory = "4G" - String dockerImage = "quay.io/biocontainers/biopet-validatevcf:0.1--0" - } - - command { - biopet-validatevcf -Xmx~{javaXmx} \ - -i ~{vcf.file} \ - -R ~{reference.fasta} - } - - output { - File stderr = stderr() - } - - runtime { - memory: memory - docker: dockerImage - } -} - -task VcfStats { - input { - IndexedVcfFile vcf - Reference reference - String outputDir - Boolean writeBinStats = false - Int localThreads = 1 - Boolean notWriteContigStats = false - Boolean skipGeneral = false - Boolean skipGenotype = false - Boolean skipSampleDistributions = false - Boolean skipSampleCompare = false - - File? intervals - Array[String]+? infoTags - Array[String]+? genotypeTags - Int? sampleToSampleMinDepth - Int? binSize - Int? maxContigsInSingleJob - String? sparkMaster - Int? sparkExecutorMemory - Array[String]+? sparkConfigValues - - String javaXmx = "4G" - String memory = "5G" - String dockerImage = "quay.io/biocontainers/biopet-vcfstats:1.2--0" - } - - command { - set -e - mkdir -p ~{outputDir} - biopet-vcfstats -Xmx~{javaXmx} \ - -I ~{vcf.file} \ - -R ~{reference.fasta} \ - -o ~{outputDir} \ - -t ~{localThreads} \ - ~{"--intervals " + intervals} \ - ~{true="--infoTag" false="" defined(infoTags)} ~{sep=" --infoTag " infoTags} \ - ~{true="--genotypeTag" false="" defined(genotypeTags)} ~{sep=" --genotypeTag " - genotypeTags} \ - ~{"--sampleToSampleMinDepth " + sampleToSampleMinDepth} \ - ~{"--binSize " + binSize} \ - ~{"--maxContigsInSingleJob " + maxContigsInSingleJob} \ - ~{true="--writeBinStats" false="" writeBinStats} \ - ~{true="--notWriteContigStats" false="" notWriteContigStats} \ - ~{true="--skipGeneral" false="" skipGeneral} \ - ~{true="--skipGenotype" false="" skipGenotype} \ - ~{true="--skipSampleDistributions" false="" skipSampleDistributions} \ - ~{true="--skipSampleCompare" false="" skipSampleCompare} \ - ~{"--sparkMaster " + sparkMaster} \ - ~{"--sparkExecutorMemory " + sparkExecutorMemory} \ - ~{true="--sparkConfigValue" false="" defined(sparkConfigValues)} ~{ - sep=" --sparkConfigValue" sparkConfigValues} - } - - output { - File? general = outputDir + "/general.tsv" - File? genotype = outputDir + "/genotype.tsv" - File? sampleDistributionAvailableAggregate = outputDir + - "/sample_distributions/Available.aggregate.tsv" - File? sampleDistributionAvailable = outputDir + "/sample_distributions/Available.tsv" - File? sampleDistributionCalledAggregate = outputDir + - "/sample_distributions/Called.aggregate.tsv" - File? sampleDistributionCalled = outputDir + "/sample_distributions/Called.tsv" - File? sampleDistributionFilteredAggregate = outputDir + - "/sample_distributions/Filtered.aggregate.tsv" - File? sampleDistributionFiltered = outputDir + "/sample_distributions/Filtered.tsv" - File? sampleDistributionHetAggregate = outputDir + "/sample_distributions/Het.aggregate.tsv" - File? sampleDistributionHetNoNRefAggregate = outputDir + - "/sample_distributions/HetNonRef.aggregate.tsv" - File? sampleDistributionHetNonRef = outputDir + "/sample_distributions/HetNonRef.tsv" - File? sampleDistributionHet = outputDir + "/sample_distributions/Het.tsv" - File? sampleDistributionHomAggregate = outputDir + "/sample_distributions/Hom.aggregate.tsv" - File? sampleDistributionHomRefAggregate = outputDir + - "/sample_distributions/HomRef.aggregate.tsv" - File? sampleDistributionHomRef = outputDir + "/sample_distributions/HomRef.tsv" - File? sampleDistributionHom = outputDir + "/sample_distributions/Hom.tsv" - File? sampleDistributionHomVarAggregate = outputDir + - "/sample_distributions/HomVar.aggregate.tsv" - File? sampleDistributionHomVar = outputDir + "/sample_distributions/HomVar.tsv" - File? sampleDistributionMixedAggregate = outputDir + - "/sample_distributions/Mixed.aggregate.tsv" - File? sampleDistributionMixed = outputDir + "/sample_distributions/Mixed.tsv" - File? sampleDistributionNoCallAggregate = outputDir + - "/sample_distributions/NoCall.aggregate.tsv" - File? sampleDistributionNoCall = outputDir + "/sample_distributions/NoCall.tsv" - File? sampleDistributionNonInformativeAggregate = outputDir + - "/sample_distributions/NonInformative.aggregate.tsv" - File? sampleDistributionNonInformative = outputDir + - "/sample_distributions/NonInformative.tsv" - File? sampleDistributionToalAggregate = outputDir + - "/sample_distributions/Total.aggregate.tsv" - File? sampleDistributionTotal = outputDir + "/sample_distributions/Total.tsv" - File? sampleDistributionVariantAggregate = outputDir + - "/sample_distributions/Variant.aggregate.tsv" - File? sampleDistributionVariant = outputDir + "/sample_distributions/Variant.tsv" - File? sampleCompareAlleleAbs = outputDir + "/sample_compare/allele.abs.tsv" - File? sampleCompareAlleleNonRefAbs = outputDir + "/sample_compare/allele.non_ref.abs.tsv" - File? sampleCompareAlleleRefAbs = outputDir + "/sample_compare/allele.ref.abs.tsv" - File? sampleCompareAlleleRel = outputDir + "/sample_compare/allele.rel.tsv" - File? sampleCompareGenotypeAbs = outputDir + "/sample_compare/genotype.abs.tsv" - File? sampleCompareGenotypeNonRefAbs = outputDir + - "/sample_compare/genotype.non_ref.abs.tsv" - File? sampleCompareGenotypeRefAbs = outputDir + "/sample_compare/genotype.ref.abs.tsv" - File? sampleCompareGenotypeRel = outputDir + "/sample_compare/genotype.rel.tsv" - # A glob is easier, but duplicates all the outputs - Array[File] allStats = select_all([ - general, - genotype, - sampleDistributionAvailableAggregate, - sampleDistributionAvailable, - sampleDistributionCalledAggregate, - sampleDistributionCalled, - sampleDistributionFilteredAggregate, - sampleDistributionFiltered, - sampleDistributionHetAggregate, - sampleDistributionHetNoNRefAggregate, - sampleDistributionHetNonRef, - sampleDistributionHet, - sampleDistributionHomAggregate, - sampleDistributionHomRefAggregate, - sampleDistributionHomRef, - sampleDistributionHom, - sampleDistributionHomVarAggregate, - sampleDistributionHomVar, - sampleDistributionMixedAggregate, - sampleDistributionMixed, - sampleDistributionNoCallAggregate, - sampleDistributionNoCall, - sampleDistributionNonInformativeAggregate, - sampleDistributionNonInformative, - sampleDistributionToalAggregate, - sampleDistributionTotal, - sampleDistributionVariantAggregate, - sampleDistributionVariant, - sampleCompareAlleleAbs, - sampleCompareAlleleNonRefAbs, - sampleCompareAlleleRefAbs, - sampleCompareAlleleRel, - sampleCompareGenotypeAbs, - sampleCompareGenotypeNonRefAbs, - sampleCompareGenotypeRefAbs, - sampleCompareGenotypeRel - ]) - } - - runtime { - cpu: localThreads - memory: memory - docker: dockerImage - } -} diff --git a/biopet/sampleconfig.wdl b/biopet/sampleconfig.wdl deleted file mode 100644 index f3955658..00000000 --- a/biopet/sampleconfig.wdl +++ /dev/null @@ -1,143 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" as common - -task SampleConfig { - input { - Array[File]+ inputFiles - String keyFilePath - - File? toolJar - String? preCommand - String? sample - String? library - String? readgroup - String? jsonOutputPath - String? tsvOutputPath - - String javaXmx = "16G" - String memory = "17G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-sampleconfig -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p . ~{"$(dirname " + jsonOutputPath + ")"} ~{"$(dirname " + tsvOutputPath + ")"} - ~{toolCommand} \ - -i ~{sep="-i " inputFiles} \ - ~{"--sample " + sample} \ - ~{"--library " + library} \ - ~{"--readgroup " + readgroup} \ - ~{"--jsonOutput " + jsonOutputPath} \ - ~{"--tsvOutput " + tsvOutputPath} \ - > ~{keyFilePath} - } - - output { - File keysFile = keyFilePath - File? jsonOutput = jsonOutputPath - File? tsvOutput = tsvOutputPath - } - - runtime { - memory: memory - } -} - -task SampleConfigCromwellArrays { - input { - Array[File]+ inputFiles - String outputPath - - File? toolJar - String? preCommand - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-sampleconfig -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputPath}) - ~{toolCommand} CromwellArrays \ - -i ~{sep="-i " inputFiles} \ - ~{"-o " + outputPath} - } - - output { - File outputFile = outputPath - } - - runtime { - memory: memory - } -} - -task CaseControl { - input { - Array[File]+ inputFiles - Array[File]+ inputIndexFiles - Array[File]+ sampleConfigs - String outputPath - String controlTag = "control" - - File? toolJar - String? preCommand - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-sampleconfig -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputPath}) - ~{toolCommand} CaseControl \ - -i ~{sep=" -i " inputFiles} \ - -s ~{sep=" -s " sampleConfigs} \ - ~{"-o " + outputPath} \ - ~{"--controlTag " + controlTag} - } - - output { - File outputFile = outputPath - CaseControls caseControls = read_json(outputFile) - } - - runtime { - memory: memory - } -} diff --git a/biopet/seqstat.wdl b/biopet/seqstat.wdl deleted file mode 100644 index c2eb5866..00000000 --- a/biopet/seqstat.wdl +++ /dev/null @@ -1,64 +0,0 @@ -version 1.0 - -# Copyright (c) 2017 Leiden University Medical Center -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import "../common.wdl" as common - -task Generate { - input { - FastqPair fastq - String outputFile - String sample - String library - String readgroup - - String? preCommand - File? toolJar - - String javaXmx = "4G" - String memory = "5G" - } - - String toolCommand = if defined(toolJar) - then "java -Xmx~{javaXmx} -jar " + toolJar - else "biopet-seqstat -Xmx~{javaXmx}" - - command { - set -e -o pipefail - ~{preCommand} - mkdir -p $(dirname ~{outputFile}) - ~{toolCommand} Generate \ - --fastqR1 ~{fastq.R1} \ - ~{"--fastqR2 " + fastq.R2} \ - --output ~{outputFile} \ - ~{"--sample " + sample} \ - ~{"--library " + library } \ - ~{"--readgroup " + readgroup } - } - - output { - File json = outputFile - } - - runtime { - memory: memory - } -} diff --git a/gffcompare.wdl b/gffcompare.wdl index 4b0d6d22..50cab8a6 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -145,10 +145,10 @@ task GffCompare { # outputs annotated: {description: "Annotated GTF file."} - loci: {description: ""} + loci: {description: "File describing the processed loci."} stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."} - tracking: {description: "File matching transcripts up between samples."} - allFiles: {description: "A collection of all outputs files."} + tracking: {description: "File matching up transcripts between samples."} + allFiles: {description: "A collection of all output files."} redundant: {description: "File containing duplicate/redundant transcripts."} missedIntrons: {description: "File denoting missed introns."} } From 33166e5795cc175bf8384d8b34ff2bcb4776fbfb Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Nov 2020 12:57:36 +0100 Subject: [PATCH 0711/1208] Update TO-DO.md. --- TO-DO.md | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/TO-DO.md b/TO-DO.md index 9216bc0c..be125abe 100644 --- a/TO-DO.md +++ b/TO-DO.md @@ -1,4 +1,4 @@ -#TO DO +#TO-DO This file describes WDL files and tasks within those files which need more specific attention than just adding outputs to the parameter_meta. @@ -8,11 +8,6 @@ missing a parameter_meta section. Some tasks are importing other WDL files. ## Out of date with new cluster & parameter_meta: -* bamstats.wdl: `Generate` -* biopet.wdl: `BaseCounter`, `FastqSplitter`, `FastqSync`, - `ValidateAnnotation`, `ValidateFastq`, `ValidateVcf`, `VcfStats` -* sampleconfig.wdl: `SampleConfig`, `SampleConfigCromwellArrays`, `CaseControl` -* seqstat.wdl: `Generate` * common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` * fastqsplitter.wdl: `Fastqsplitter` @@ -25,13 +20,8 @@ Some tasks are importing other WDL files. * wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` * picard.wdl: `ScatterIntervalList` -## Imports other tasks: -* bamstats.wdl -* biopet.wdl -* sampleconfig.wdl -* seqstat.wdl -* clever.wdl -* strelka.wdl - ## Requires input from others: +These tasks below are still missing descriptions `outputs` in +the `parameter_meta`. * somaticseq.wdl +* picard.wdl From 78951778ad81d402d21db421cc6f7284a24c1941 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 12 Nov 2020 14:44:32 +0100 Subject: [PATCH 0712/1208] Fix syntax. --- gatk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gatk.wdl b/gatk.wdl index 82244caa..5cf7c673 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1221,10 +1221,10 @@ task ModelSegments { copyRatioCBS: {description: "The posterior medians of the log2 copy ratio."} alleleFractionCBS: {description: "Minor-allele fraction."} unsmoothedModeledSegments: {description: "The initial modeled-segments result before segmentation smoothing."} - unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing"} + unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing."} unsmoothedAlleleFractionParameters: {description: "The initial allele-fraction-model global-parameter result before segmentation smoothing."} modeledSegments: {description: "The final modeled-segments result after segmentation smoothing."} - copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing"} + copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing."} alleleFractionParameters: {description: "The final allele-fraction-model global-parameter result after segmentation smoothing."} normalHetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the matched-normal sample."} } @@ -1766,7 +1766,7 @@ task VariantFiltration { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed. "} + filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed."} filteredVcfIndex: {description: "Index of filtered VCF."} } } From d5863eecf95da8f78d4d06af2bd6b91bc036a4f0 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 17 Nov 2020 14:45:02 +0100 Subject: [PATCH 0713/1208] Add parameter meta --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index 20fd1f95..029f0899 100644 --- a/picard.wdl +++ b/picard.wdl @@ -122,8 +122,10 @@ task CollectHsMetrics { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} targets: {description: "Picard interval file of the capture targets.", category: "required"} + targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} + baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 8cc4c073e40ac70f3398eda3bd047aa42d801d26 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 17 Nov 2020 14:46:39 +0100 Subject: [PATCH 0714/1208] Add period to end of sentence --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 029f0899..17930e3c 100644 --- a/picard.wdl +++ b/picard.wdl @@ -125,7 +125,7 @@ task CollectHsMetrics { targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "required"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} - baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set", category: "advanced"} + baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 3b0874c0ed573307c2de1926d6df41c808be149b Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Tue, 17 Nov 2020 15:59:10 +0100 Subject: [PATCH 0715/1208] Update picard.wdl parameter meta Co-authored-by: Davy Cats --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index 17930e3c..d5601ad0 100644 --- a/picard.wdl +++ b/picard.wdl @@ -122,7 +122,7 @@ task CollectHsMetrics { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} targets: {description: "Picard interval file of the capture targets.", category: "required"} - targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "required"} + targetsFile: {description: "Picard interval file of the capture targets, the same as targets.", category: "advanced"} basename: {description: "The basename/prefix of the output files (may include directories).", category: "required"} baits: {description: "Picard interval file of the capture bait set.", category: "advanced"} baitsFile: {description: "Picard interval file of the bait set. Uses targets as a fallback when baits is not set.", category: "advanced"} From 67116dfe6c9021a011b97889ee08f99f25d5e7b8 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 17 Nov 2020 16:56:08 +0100 Subject: [PATCH 0716/1208] Update version of tools. --- CHANGELOG.md | 2 ++ TO-DO.md | 27 --------------------------- cutadapt.wdl | 2 +- stringtie.wdl | 4 ++-- 4 files changed, 5 insertions(+), 30 deletions(-) delete mode 100644 TO-DO.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b668ab1..27d4aa71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update CutAdapt to version 3.0.0. ++ Update StringTie to version 2.1.4. + Complete `parameter_meta` for tasks missing the outputs. + DeepVariant: Add an optional input for the gvcf index. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. diff --git a/TO-DO.md b/TO-DO.md deleted file mode 100644 index be125abe..00000000 --- a/TO-DO.md +++ /dev/null @@ -1,27 +0,0 @@ -#TO-DO -This file describes WDL files and tasks within those files which need -more specific attention than just adding outputs to the parameter_meta. - -Some tasks have not been updated to match the new SLURM requirements and are -missing a parameter_meta section. - -Some tasks are importing other WDL files. - -## Out of date with new cluster & parameter_meta: -* common.wdl: `AppendToStringArray`, `CheckFileMD5`, `ConcatenateTextFiles`, - `Copy`, `CreateLink`, `MapMd5`, `StringArrayMd5` -* fastqsplitter.wdl: `Fastqsplitter` -* flash.wdl: `Flash` -* macs2.wdl: `PeakCalling` -* ncbi.wdl: `GenomeDownload`, `DownloadNtFasta`, `DownloadAccessionToTaxId` -* seqtk.wdl: `Sample` -* spades.wdl: `Spades` -* unicycler.wdl: `Unicycler` -* wisestork.wdl: `Count`, `GcCorrect`, `Newref`, `Zscore` -* picard.wdl: `ScatterIntervalList` - -## Requires input from others: -These tasks below are still missing descriptions `outputs` in -the `parameter_meta`. -* somaticseq.wdl -* picard.wdl diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..b9f5a649 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" + String dockerImage = "quay.io/biocontainers/cutadapt:3.0--py37hf01694f_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) diff --git a/stringtie.wdl b/stringtie.wdl index 05df05c6..81d96132 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -35,7 +35,7 @@ task Stringtie { Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) - String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" + String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } command { @@ -101,7 +101,7 @@ task Merge { String memory = "10G" Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20) - String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0" + String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } command { From 826cbaf4c0b3eae2b5fb3db8439211c1d9f8fdab Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 17 Nov 2020 17:05:22 +0100 Subject: [PATCH 0717/1208] Update versions. --- CHANGELOG.md | 2 ++ minimap2.wdl | 4 ++-- multiqc.wdl | 2 +- nanopack.wdl | 2 +- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27d4aa71..3da95305 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update NanoPlot to version 1.32.1. ++ Update MultiQC to version 1.9. + Update CutAdapt to version 3.0.0. + Update StringTie to version 2.1.4. + Complete `parameter_meta` for tasks missing the outputs. diff --git a/minimap2.wdl b/minimap2.wdl index 1b719da6..d2e69905 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -33,7 +33,7 @@ task Indexing { Int cores = 1 String memory = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" + String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" } command { @@ -100,7 +100,7 @@ task Mapping { Int cores = 4 String memory = "30G" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0" + String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" } command { diff --git a/multiqc.wdl b/multiqc.wdl index 405c0a0b..2571463a 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 2 + ceil(size(reports, "G") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1" + String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } Int memoryGb = 2 + ceil(size(reports, "G")) diff --git a/nanopack.wdl b/nanopack.wdl index f238ce7b..f86641b0 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -42,7 +42,7 @@ task NanoPlot { Int threads = 2 String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/nanoplot:1.32.0--py_0" + String dockerImage = "quay.io/biocontainers/nanoplot:1.32.1--py_0" } Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} From 70b3484461c1b887f558bb2a5a327ce98ac4f388 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 19 Nov 2020 12:53:26 +0100 Subject: [PATCH 0718/1208] Update versions. --- CHANGELOG.md | 4 ++++ isoseq3.wdl | 2 +- lima.wdl | 2 +- picard.wdl | 32 ++++++++++++++++---------------- samtools.wdl | 18 +++++++++--------- scripts | 2 +- 6 files changed, 32 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3da95305..dae3f185 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update Lima to version 2.0.0. ++ Update IsoSeq3 to version 3.4.0. ++ Update samtools to version 1.11. ++ Update Picard to version 2.23.8. + Update NanoPlot to version 1.32.1. + Update MultiQC to version 1.9. + Update CutAdapt to version 3.0.0. diff --git a/isoseq3.wdl b/isoseq3.wdl index c1c4397c..aacbfc60 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -34,7 +34,7 @@ task Refine { Int threads = 2 String memory = "2G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0" + String dockerImage = "quay.io/biocontainers/isoseq3:3.4.0--0" } command { diff --git a/lima.wdl b/lima.wdl index 33b2328b..119db3f4 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,7 +51,7 @@ task Lima { Int threads = 2 String memory = "2G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/lima:1.11.0--0" + String dockerImage = "quay.io/biocontainers/lima:2.0.0--0" } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} diff --git a/picard.wdl b/picard.wdl index d5601ad0..f75fdc32 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -158,7 +158,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -284,7 +284,7 @@ task CollectRnaSeqMetrics { String memory = "9G" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -342,7 +342,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -404,7 +404,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -453,7 +453,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3G" - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -503,7 +503,7 @@ task GatherBamFiles { Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -558,7 +558,7 @@ task GatherVcfs { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -622,7 +622,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -701,7 +701,7 @@ task MergeVCFs { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -757,7 +757,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" File? noneFile } @@ -818,7 +818,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4G" - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -859,7 +859,7 @@ task SortSam { # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { @@ -917,7 +917,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.20.5--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } @@ -967,7 +967,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.19.0--0" + String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" } command { diff --git a/samtools.wdl b/samtools.wdl index 0aecf4ee..9042a0df 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -69,7 +69,7 @@ task Faidx { String outputDir String memory = "2G" - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -118,7 +118,7 @@ task Fastq { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -180,7 +180,7 @@ task FilterShortReadsBam { String memory = "1G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -226,7 +226,7 @@ task Flagstat { String memory = "256M" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -266,7 +266,7 @@ task Index { String memory = "2G" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } # Select_first is needed, otherwise womtool validate fails. @@ -317,7 +317,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } command { @@ -356,7 +356,7 @@ task Merge { Int threads = 1 String memory = "4G" Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -411,7 +411,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -523,7 +523,7 @@ task View { Int threads = 1 String memory = "1G" Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } String outputIndexPath = basename(outputFileName) + ".bai" diff --git a/scripts b/scripts index 0cca0f40..85e2ec54 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 0cca0f40a8e9121e8dcc9e76838f85835a0d8e94 +Subproject commit 85e2ec542b65be5f2a25c22db05c28700fbe6db5 From b5558be1a1706b2ad96f947e61db78985c747cd7 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 19 Nov 2020 13:29:00 +0100 Subject: [PATCH 0719/1208] Revert update CutAdapt. --- .travis.yml | 3 ++- CHANGELOG.md | 1 - VERSION | 2 +- cutadapt.wdl | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 396b998f..3cf0681f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,4 +19,5 @@ before_install: install: - conda install --file requirements-test.txt -script: bash scripts/biowdl_lint.sh +script: + - bash scripts/biowdl_lint.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index dae3f185..a6cc9bff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,6 @@ version 5.0.0-dev + Update Picard to version 2.23.8. + Update NanoPlot to version 1.32.1. + Update MultiQC to version 1.9. -+ Update CutAdapt to version 3.0.0. + Update StringTie to version 2.1.4. + Complete `parameter_meta` for tasks missing the outputs. + DeepVariant: Add an optional input for the gvcf index. diff --git a/VERSION b/VERSION index ee74734a..0062ac97 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.1.0 +5.0.0 diff --git a/cutadapt.wdl b/cutadapt.wdl index b9f5a649..b2dbdec0 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:3.0--py37hf01694f_0" + String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) From 9b8d8a9844ea41ad4f1f630ed6b816be5596f8c9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:04:28 +0100 Subject: [PATCH 0720/1208] add hmftools.wdl --- hmftools.wdl | 433 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 433 insertions(+) create mode 100644 hmftools.wdl diff --git a/hmftools.wdl b/hmftools.wdl new file mode 100644 index 00000000..73c3e318 --- /dev/null +++ b/hmftools.wdl @@ -0,0 +1,433 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Amber { + input { + String normalName + File normalBam + File normalBamIndex + String tumorName + File tumorBam + File tumorBamIndex + String outputDir = "./amber" + File loci + File referenceFasta + File referenceFastaFai + File referenceFastaDict + + Int threads = 2 + String memory = = "33G" + String javaXmx = "32G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" + } + + command { + AMBER -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -reference_bam ~{normalBam} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + -output_dir ~{outputDir} \ + -threads ~{threads} \ + -ref_genome ~{referenceFasta} \ + -loci ~{loci} + } + + output { + File version = "amber.version" + File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" + File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" + File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" + File tumorBafVcfIndex = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz.tbi" + File tumorContaminationVcf = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz" + File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" + File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" + File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" + File normalSnpVcf = "~{outputDir}/~{normalName}.amber.snp.vcf.gz" + File normalSnpVcfIndex = "~{outputDir}/~{normalName}.amber.snp.vcf.gz.tbi" + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, + tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, + normalSnpVcf, normalSnpVcfIndex] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + cpu: threads + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + normalBam: {description: "The normal BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The tumor BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + loci: {description: "A VCF file containing likely heterozygous sites.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Cobalt { + input { + String normalName + File normalBam + File normalBamIndex + String tumorName + File tumorBam + File tumorBamIndex + String outputDir = "./cobalt" + File gcProfile + + Int threads = 1 + String memory = = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" + } + + command { + COBALT -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -reference_bam ~{normalBam} \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + -output_dir ~{outputDir}\ + -threads ~{threads} \ + -gc_profile ~{gcProfile} + } + + output { + File version = "cobalt.version" + File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" + File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" + File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" + File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" + File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" + File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" + File tumorChrLen = "~{outputDir}/~{tumorName}.chr.len" + Array[File] outputs = [version, normalGcMedianTsv, normalRationMedianTsv, + normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv, tumorChrLen] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + cpu: threads + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + normalBam: {description: "The normal BAM file.", category: "required"} + normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The tumor BAM file.", category: "required"} + tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GripssApplicationKt { + input { + File inputVcf + String outputPath = "gripss.vcf.gz" + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File breakpointHotspot + File breakendPon + File breakpointPon + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssApplicationKt \ + -ref_genome ~{referenceFasta} \ + -breakpoint_hotspot ~{breakpointHotspot} \ + -breakend_pon ~{breakendPon} \ + -breakpoint_pon ~{breakpointPon} \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GripssHardFilterApplicationKt { + input { + File inputVcf + String outputPath = "gripss_hard_filter.vcf.gz" + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ + -input_vcf ~{inputVcf} \ + -output_vcf ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF.", category: "required"} + outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Purple { + input { + String normalName + String tumorName + String outputDir = "./purple" + Array[File]+ amberOutput + Array[File]+ cobaltOutput + File gcProfile + File somaticVcf + File filteredSvVcf + File fullSvVcf + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File hotspots + + Int threads = 1 + Int time_minutes = 60 + String memory = "13G" + String javaXmx = "12G" + String docker = "quay.io/biocontainers/hmftools-purple:2.51--1" + } + + command { + PURPLE -Xmx~{javaXmx} \ + -reference ~{normalName} \ + -tumor ~{tumorName} \ + -output_dir ~{outputDir} \ + -amber ~{sub(amberOutput, basename(amberOutput[0]), "")} \ + -cobalt ~{sub(cobaltOutput, basename(cobaltOutput[0]), "")} \ + -gc_profile ~{gcProfile} \ + -somatic_vcf ~{somaticVcf} \ + -structural_vcf ~{filteredSvVcf} \ + -sv_recovery_vcf ~{fullSvVcf} \ + -circos /usr/local/bin/circos \ + -ref_genome ~{referenceFasta} \ + -driver_catalog \ + -hotspots ~{hotspots} \ + -threads ~{threads} + + # TODO if shallow also the following: + #-highly_diploid_percentage 0.88 \ + #-somatic_min_total 100 \ + #-somatic_min_purity_spread 0.1 + } + + output { + #TODO + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + normalName: {description: "the name of the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + outputDir: {description: "The path to the output directory.", category: "common"} + amberOutput: {description: "The output files of hmftools amber.", category: "required"} + cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"} + gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} + somaticVcf: {description: "The somatic variant calling results.", category: "required"} + filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"} + fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + + threads: {description: "The number of threads the program will use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task Sage { + input { + String tumorName + File tumorBam + File tumorBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + File hotspots + File panelBed + File highConfidenceBed + Boolean hg38 = false + String outputPath = "./sage.vcf.gz" + + String? normalName + File? normalBam + File? normalBamIndex + + Int threads = 2 + String javaXmx = "32G" + String memory = "33G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" + } + + command { + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ + com.hartwig.hmftools.sage.SageApplication \ + -tumor ~{tumorName} \ + -tumor_bam ~{tumorBam} \ + ~{"-reference " + normalName} \ + ~{"-reference_bam " + normalBam} \ + -ref_genome ~{referenceFasta} \ + -hotspots ~{hotspots} \ + -panel_bed ~{panelBed} \ + -high_confidence_bed ~{highConfidenceBed} \ + -assembly ~{true="hg38" false="hg19" hg38} \ + -threads ~{threads} \ + -out ~{outputPath} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. + # This seems to be a systemic issue with R generated plots in biocontainers... + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + cpu: threads + docker: dockerImage + memory: memory + } + + parameter_meta { + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} + tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} + normalName: {description: "The name of the normal/reference sample.", category: "common"} + normalBam: {description: "The BAM file for the normal sample.", category: "common"} + normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} + highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From 90fd344b8f41fb6b1d632a8412ec2b416c5c7715 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:11:19 +0100 Subject: [PATCH 0721/1208] fix some typos --- hmftools.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 73c3e318..3757cade 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,7 +35,7 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = = "33G" + String memory = "33G" String javaXmx = "32G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" @@ -112,7 +112,7 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = = "9G" + String memory = "9G" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" @@ -286,10 +286,10 @@ task Purple { File hotspots Int threads = 1 - Int time_minutes = 60 + Int timeMinutes = 60 String memory = "13G" String javaXmx = "12G" - String docker = "quay.io/biocontainers/hmftools-purple:2.51--1" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.51--1" } command { @@ -297,8 +297,8 @@ task Purple { -reference ~{normalName} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ - -amber ~{sub(amberOutput, basename(amberOutput[0]), "")} \ - -cobalt ~{sub(cobaltOutput, basename(cobaltOutput[0]), "")} \ + -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ + -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ -structural_vcf ~{filteredSvVcf} \ From 764f188c73d8c1b57f0d50b148a30d0e84309c42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:39:52 +0100 Subject: [PATCH 0722/1208] fix outputs amber/cobalt --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3757cade..09af79c9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -54,7 +54,7 @@ task Amber { } output { - File version = "amber.version" + File version = "~{outputDir}/amber.version" File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf" File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv" File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz" @@ -130,7 +130,7 @@ task Cobalt { } output { - File version = "cobalt.version" + File version = "~{outputDir}/cobalt.version" File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" From 857da21ef4b61276d3beb5ddbe56d0895cd96c32 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 14:54:06 +0100 Subject: [PATCH 0723/1208] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 09af79c9..ed2914bf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -124,7 +124,7 @@ task Cobalt { -reference_bam ~{normalBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ - -output_dir ~{outputDir}\ + -output_dir ~{outputDir} \ -threads ~{threads} \ -gc_profile ~{gcProfile} } From 54ac9d0c41f74c578f2418bc76483d1081695369 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 19 Nov 2020 15:18:41 +0100 Subject: [PATCH 0724/1208] add missed argument in purple --- hmftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index ed2914bf..fc56ecd9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -283,6 +283,7 @@ task Purple { File referenceFasta File referenceFastaFai File referenceFastaDict + File driverGenePanel File hotspots Int threads = 1 @@ -306,6 +307,7 @@ task Purple { -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ -driver_catalog \ + -driver_gene_panel ~{driverGenePanel} \ -hotspots ~{hotspots} \ -threads ~{threads} @@ -340,6 +342,7 @@ task Purple { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + driverGenePanel: {description: "A bed file describing the driver gene panel.", category: "required"} hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} From 243c1dbfc834d2e52876e826bf2f852fe51cb2fb Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 26 Nov 2020 09:05:09 +0100 Subject: [PATCH 0725/1208] enable genotyping --- smoove.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index e5c5348f..d1011f6c 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -41,11 +41,13 @@ task Call { --outdir ~{outputDir} \ --name ~{sample} \ --fasta ~{referenceFasta} \ + --removepr \ + --genotype \ ~{bamFile} } output { - File smooveVcf = outputDir + "/" + sample + "-smoove.vcf.gz" + File smooveVcf = outputDir + "/" + sample + "-smoove.genotyped.vcf.gz" } runtime { From 86f26caf9fa94c5aa2b2e917bc608e1ef8173966 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 26 Nov 2020 09:31:40 +0100 Subject: [PATCH 0726/1208] Update PacBio tasks. --- lima.wdl | 7 +++++-- pacbio.wdl | 10 +++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lima.wdl b/lima.wdl index 119db3f4..c06a9a73 100644 --- a/lima.wdl +++ b/lima.wdl @@ -88,9 +88,12 @@ task Lima { ~{barcodeFile} \ ~{outputPrefix + ".bam"} - # copy the files with the default filename to the folder specified in + # Copy the files with the default filename to the folder specified in # outputPrefix. - if [ "~{basename(outputPrefix)}.json" != "~{outputPrefix}.json" ]; then + if [[ -f "~{outputPrefix}.json" ]] + then + echo "Log files already at output location." + else cp "~{basename(outputPrefix)}.json" "~{outputPrefix}.json" cp "~{basename(outputPrefix)}.lima.counts" "~{outputPrefix}.lima.counts" cp "~{basename(outputPrefix)}.lima.report" "~{outputPrefix}.lima.report" diff --git a/pacbio.wdl b/pacbio.wdl index df0343d9..7c0113fd 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -23,7 +23,7 @@ version 1.0 task mergePacBio { input { Array[File]+ reports - String mergedReport + String outputPathMergedReport String memory = "4G" String dockerImage = "lumc/pacbio-merge:0.2" @@ -31,10 +31,10 @@ task mergePacBio { command { set -e - mkdir -p $(dirname ~{mergedReport}) + mkdir -p $(dirname ~{outputPathMergedReport}) pacbio_merge \ --reports ~{sep=" " reports} \ - --json-output ~{mergedReport} + --json-output ~{outputPathMergedReport} } runtime { @@ -43,13 +43,13 @@ task mergePacBio { } output { - File outputMergedReport = mergedReport + File outputMergedReport = outputPathMergedReport } parameter_meta { # inputs reports: {description: "The PacBio report files to merge.", category: "required"} - mergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} + outputPathMergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 02f71e1708a92c7128165ab2919b3c9f4fb117dc Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 27 Nov 2020 14:44:01 +0100 Subject: [PATCH 0727/1208] Upload another fix. --- pbbam.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/pbbam.wdl b/pbbam.wdl index d893e64d..ae64b87c 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -36,7 +36,6 @@ task Index { String bamIndexPath = outputPath + ".pbi" command { - bash -c ' set -e # Make sure outputBamPath does not exist. if [ ! -f ~{outputPath} ] @@ -45,7 +44,6 @@ task Index { ln ~{bamFile} ~{outputPath} fi pbindex ~{outputPath} ~{bamIndexPath} - ' } output { From a34711e264482507e73669190b456d4de499f164 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 30 Nov 2020 10:37:23 +0100 Subject: [PATCH 0728/1208] downgrade stringtie and fix size call in gffread --- CHANGELOG.md | 5 ++++- gffread.wdl | 2 +- stringtie.wdl | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6cc9bff..216fdd67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,13 +10,16 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Fixed the `size` call in the default for gffread's timeMinutes, to retrieve + GBs instead of bytes. ++ Update stringtie to version 1.3.6. + Update Lima to version 2.0.0. + Update IsoSeq3 to version 3.4.0. + Update samtools to version 1.11. + Update Picard to version 2.23.8. + Update NanoPlot to version 1.32.1. + Update MultiQC to version 1.9. -+ Update StringTie to version 2.1.4. ++ ~Update StringTie to version 2.1.4.~ + Complete `parameter_meta` for tasks missing the outputs. + DeepVariant: Add an optional input for the gvcf index. + Samtools: `Sort` task now has `threads` in runtime instead of `1`. diff --git a/gffread.wdl b/gffread.wdl index 66230989..967dd5c9 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,7 +32,7 @@ task GffRead { String? proteinFastaPath String? filteredGffPath - Int timeMinutes = 1 + ceil(size(inputGff) * 10) + Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } diff --git a/stringtie.wdl b/stringtie.wdl index 81d96132..d3a6f73d 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -35,7 +35,7 @@ task Stringtie { Int threads = 1 String memory = "2G" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) - String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" + String dockerImage = "quay.io/biocontainers/stringtie:1.3.6--h92e31bf_0" } command { From ff47f07c0657f717fbf2311b56cdd3ad3b23a7c2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 30 Nov 2020 17:22:38 +0100 Subject: [PATCH 0729/1208] Update lima. --- CHANGELOG.md | 2 ++ lima.wdl | 39 ++++++++++++++++----------------------- 2 files changed, 18 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6cc9bff..01303723 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. ++ Lima: Fix copy commands. + Update Lima to version 2.0.0. + Update IsoSeq3 to version 3.4.0. + Update samtools to version 1.11. diff --git a/lima.wdl b/lima.wdl index c06a9a73..90cd6986 100644 --- a/lima.wdl +++ b/lima.wdl @@ -56,7 +56,7 @@ task Lima { Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" lima \ @@ -83,33 +83,26 @@ task Lima { ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ --num-threads ~{threads} \ - ~{"--log-file " + outputPrefix + ".stderr.log"} \ + ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{outputPrefix + ".bam"} + ~{outputPrefix + ".fl.bam"} - # Copy the files with the default filename to the folder specified in - # outputPrefix. - if [[ -f "~{outputPrefix}.json" ]] - then - echo "Log files already at output location." - else - cp "~{basename(outputPrefix)}.json" "~{outputPrefix}.json" - cp "~{basename(outputPrefix)}.lima.counts" "~{outputPrefix}.lima.counts" - cp "~{basename(outputPrefix)}.lima.report" "~{outputPrefix}.lima.report" - cp "~{basename(outputPrefix)}.lima.summary" "~{outputPrefix}.lima.summary" - fi - } + dirName="$(dirname ~{outputPrefix})" + find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam" > bamFiles.txt + find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam.pbi" > bamIndexes.txt + find "$(cd ${dirName}; pwd)" -name "*.fl.*.subreadset.xml" > subreadsets.txt + >>> output { - Array[File] limaBam = glob("*.bam") - Array[File] limaBamIndex = glob("*.bam.pbi") - Array[File] limaXml = glob("*.subreadset.xml") - File limaStderr = outputPrefix + ".stderr.log" - File limaJson = outputPrefix + ".json" - File limaCounts = outputPrefix + ".lima.counts" - File limaReport = outputPrefix + ".lima.report" - File limaSummary = outputPrefix + ".lima.summary" + Array[File] limaBam = read_lines("bamFiles.txt") + Array[File] limaBamIndex = read_lines("bamIndexes.txt") + Array[File] limaXml = read_lines("subreadsets.txt") + File limaStderr = outputPrefix + ".fl.stderr.log" + File limaJson = outputPrefix + ".fl.json" + File limaCounts = outputPrefix + ".fl.lima.counts" + File limaReport = outputPrefix + ".fl.lima.report" + File limaSummary = outputPrefix + ".fl.lima.summary" } runtime { From 3de3fcc809734b3a43080a75e9ad683bb0ee055f Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 30 Nov 2020 17:24:07 +0100 Subject: [PATCH 0730/1208] Update CHANGELOG. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01303723..0d6c0bc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. -+ Lima: Fix copy commands. ++ Lima: Fix copy commands & return to `fl` naming. + Update Lima to version 2.0.0. + Update IsoSeq3 to version 3.4.0. + Update samtools to version 1.11. From 0df52e802caa2e7f3793ec37f6378d8929bb6411 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 1 Dec 2020 12:21:37 +0100 Subject: [PATCH 0731/1208] Remove naming. --- lima.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lima.wdl b/lima.wdl index 90cd6986..2455aaac 100644 --- a/lima.wdl +++ b/lima.wdl @@ -83,26 +83,26 @@ task Lima { ~{true="--peek-guess" false="" peekGuess} \ --log-level ~{logLevel} \ --num-threads ~{threads} \ - ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \ + ~{"--log-file " + outputPrefix + ".lima.stderr.log"} \ ~{inputBamFile} \ ~{barcodeFile} \ - ~{outputPrefix + ".fl.bam"} + ~{outputPrefix + ".bam"} dirName="$(dirname ~{outputPrefix})" - find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam" > bamFiles.txt - find "$(cd ${dirName}; pwd)" -name "*.fl.*.bam.pbi" > bamIndexes.txt - find "$(cd ${dirName}; pwd)" -name "*.fl.*.subreadset.xml" > subreadsets.txt + find "$(cd ${dirName}; pwd)" -name "*.bam" > bamFiles.txt + find "$(cd ${dirName}; pwd)" -name "*.bam.pbi" > bamIndexes.txt + find "$(cd ${dirName}; pwd)" -name "*.subreadset.xml" > subreadsets.txt >>> output { Array[File] limaBam = read_lines("bamFiles.txt") Array[File] limaBamIndex = read_lines("bamIndexes.txt") Array[File] limaXml = read_lines("subreadsets.txt") - File limaStderr = outputPrefix + ".fl.stderr.log" - File limaJson = outputPrefix + ".fl.json" - File limaCounts = outputPrefix + ".fl.lima.counts" - File limaReport = outputPrefix + ".fl.lima.report" - File limaSummary = outputPrefix + ".fl.lima.summary" + File limaStderr = outputPrefix + ".lima.stderr.log" + File limaJson = outputPrefix + ".json" + File limaCounts = outputPrefix + ".lima.counts" + File limaReport = outputPrefix + ".lima.report" + File limaSummary = outputPrefix + ".lima.summary" } runtime { From fec33b447644769d5c1602d7a0fee0c6ee19b3b9 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 1 Dec 2020 12:22:25 +0100 Subject: [PATCH 0732/1208] Update changelog. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77cf803b..22f41826 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. -+ Lima: Fix copy commands & return to `fl` naming. ++ Lima: Fix copy commands. + Fixed the `size` call in the default for gffread's timeMinutes, to retrieve GBs instead of bytes. + Update stringtie to version 1.3.6. From e87052a739ba2d2ac29cf0dad1cb5ace642f6e8c Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 9 Dec 2020 13:26:24 +0100 Subject: [PATCH 0733/1208] add duphold paramater in smoove --- smoove.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/smoove.wdl b/smoove.wdl index d1011f6c..7a7e4305 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -43,6 +43,7 @@ task Call { --fasta ~{referenceFasta} \ --removepr \ --genotype \ + --duphold \ ~{bamFile} } From 19b79d9c2617212deb1d2dca1e6ca93c2115d847 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Dec 2020 15:59:19 +0100 Subject: [PATCH 0734/1208] Use github actions CI --- .github/workflows/ci.yml | 32 ++++++++++++++++++++++++++++++++ .travis.yml | 23 ----------------------- requirements-test.txt | 11 ----------- 3 files changed, 32 insertions(+), 34 deletions(-) create mode 100644 .github/workflows/ci.yml delete mode 100644 .travis.yml delete mode 100644 requirements-test.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..97d329ad --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,32 @@ +name: Continuous integration + +on: + pull_request: + paths: + - "**.wdl" # Workflow files and task + - "**.yml" # Ci configuration, tests and docker images + - "!docs/**" + +defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important + shell: bash -l {0} + +jobs: + lint: + runs-on: ubuntu-latest + name: Womtool validate and submodule up to date. + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + - name: install miniconda + uses: conda-incubator/setup-miniconda@v2.0.1 + with: + channels: conda-forge,bioconda,defaults + # Conda-incubator uses 'test' environment by default. + - name: install requirements + run: conda install -n test cromwell miniwdl wdl-aid + - name: run linting + run: bash scripts/biowdl_lint.sh \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 3cf0681f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,23 +0,0 @@ -# We use conda to install cromwell. - -language: python - -python: - - 3.6 - -before_install: - # Install conda - - export MINICONDA=${HOME}/miniconda - - export PATH=${MINICONDA}/bin:${PATH} - - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -f -p ${MINICONDA} - - conda config --set always_yes yes - - conda config --add channels defaults - - conda config --add channels bioconda - - conda config --add channels conda-forge - -install: - - conda install --file requirements-test.txt - -script: - - bash scripts/biowdl_lint.sh diff --git a/requirements-test.txt b/requirements-test.txt deleted file mode 100644 index 0b01d193..00000000 --- a/requirements-test.txt +++ /dev/null @@ -1,11 +0,0 @@ -# These are the programs used for testing these biowdl tasks. -# These requirements can be installed with conda with the bioconda channel -# activated. -# For more information on how to set up conda with bioconda channel see: -# http://bioconda.github.io/#install-conda -# This file can be installed with "conda install --file requirements-test.txt". - -cromwell -womtool -miniwdl -wdl-aid From 52b7c02f4ed1e7bee376af192747efa75cf55004 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 10:04:43 +0100 Subject: [PATCH 0735/1208] bcftools: rm memory parameter meta --- bcftools.wdl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28380dea..0cbfdefd 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -43,7 +43,7 @@ task Annotate { File? regionsFile File? renameChrs File? samplesFile - + Int threads = 0 String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) @@ -53,7 +53,7 @@ task Annotate { Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") command { - set -e + set -e mkdir -p "$(dirname ~{outputPath})" bcftools annotate \ -o ~{outputPath} \ @@ -154,7 +154,7 @@ task Sort { File outputVcf = outputPath File? outputVcfIndex = outputPath + ".tbi" } - + runtime { memory: memory time_minutes: timeMinutes @@ -291,6 +291,8 @@ task View { File inputFile String outputPath = "output.vcf" + String? exclude + String? include String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -302,6 +304,8 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ + ~{"--include " + include} \ + ~{"--exclude " + exclude} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -324,7 +328,8 @@ task View { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 4cf91963c64c48478c8009e65aa20678ad423eb9 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 10:44:10 +0100 Subject: [PATCH 0736/1208] add duphold --- duphold.sh | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 duphold.sh diff --git a/duphold.sh b/duphold.sh new file mode 100644 index 00000000..6e65ee5c --- /dev/null +++ b/duphold.sh @@ -0,0 +1,76 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Duphold { + input { + File inputVcf + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputPath = "./duphold.vcf" + + String memory = "15G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + } + + String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" + + command { + set -e + mkdir -p ~{outputDir} + export DUPHOLD_SAMPLE_NAME=~{sample} + duphold \ + -v ~{inputVcf} \ + -b ~{bamFile} \ + -f ~{referenceFasta} \ + -o ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } + sample: {description: "The name of the sample.", category: "required"} + outputDir: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + smooveVcf: {description: "Calls of structural variants in VCF file."} + } +} From fb65bfe1ab5e627cb23812264ab651748e844b89 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 11:13:40 +0100 Subject: [PATCH 0737/1208] add duphold.wdl --- duphold.sh => duphold.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) rename duphold.sh => duphold.wdl (92%) diff --git a/duphold.sh b/duphold.wdl similarity index 92% rename from duphold.sh rename to duphold.wdl index 6e65ee5c..9c7255ff 100644 --- a/duphold.sh +++ b/duphold.wdl @@ -32,7 +32,7 @@ task Duphold { String memory = "15G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" + String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" @@ -60,6 +60,7 @@ task Duphold { parameter_meta { # inputs + inputVcf: {description: "The VCF file to process.", category: "required"} bamFile: {description: "The bam file to process.", category: "required"} bamIndex: {description: "The index of the bam file.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} @@ -71,6 +72,6 @@ task Duphold { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - smooveVcf: {description: "Calls of structural variants in VCF file."} + outputVcf: {description: "Duphold annotated VCF file."} } } From fca78c3d28d57b5ebfe802deccc52b86ae00c651 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 12:51:23 +0100 Subject: [PATCH 0738/1208] fix outputpath --- duphold.wdl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/duphold.wdl b/duphold.wdl index 9c7255ff..80fe31d2 100644 --- a/duphold.wdl +++ b/duphold.wdl @@ -35,11 +35,9 @@ task Duphold { String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } - String outputVCF = outputDir + basename(inputVcf, ".vcf") + "-duphold.vcf" - command { set -e - mkdir -p ~{outputDir} + mkdir -p "$(dirname ~{outputPath})" export DUPHOLD_SAMPLE_NAME=~{sample} duphold \ -v ~{inputVcf} \ @@ -66,7 +64,7 @@ task Duphold { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } sample: {description: "The name of the sample.", category: "required"} - outputDir: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 80566da7e582afa0d445547fb3555a8f9cccae07 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 11 Dec 2020 12:51:39 +0100 Subject: [PATCH 0739/1208] remove duphold parameter --- smoove.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/smoove.wdl b/smoove.wdl index 7a7e4305..d1011f6c 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -43,7 +43,6 @@ task Call { --fasta ~{referenceFasta} \ --removepr \ --genotype \ - --duphold \ ~{bamFile} } From 0232cf8e79dc6975eecc9a7d2336f45f2d191f05 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Dec 2020 16:19:20 +0100 Subject: [PATCH 0740/1208] add some taks --- hmftools.wdl | 47 ++++++++++++++++++++++++++++++++++++++ picard.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index fc56ecd9..f9a606e7 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,6 +269,53 @@ task GripssHardFilterApplicationKt { } } +task HealthChecker { + input { + String normalName + String tumorName + + String javaXmx = "10G" + } + + command { + java -Xmx10G \ + -jar /opt/tools/health-checker/3.1/health-checker.jar \ + -reference ~{normalName} \ + -tumor ~{tumorName} \ + -metrics_dir ~{metricsPath} \ + -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -output_dir ~{outputDir} + } + + # super("health-checker", + # Versions.HEALTH_CHECKER, + # "health-checker.jar", + # "10G", + # Lists.newArrayList("-reference", + # referenceSampleName, + # "-tumor", + # tumorSampleName, + # "-ref_wgs_metrics_file", + # referenceMetricsPath, + # "-tum_wgs_metrics_file", + # tumorMetricsPath, + # "-ref_flagstat_file", + # referenceFlagstatPath, + # "-tum_flagstat_file", + # tumorFlagstatPath, + # "-purple_dir", + # purplePath, + # "-output_dir", + # outputPath)); + + output { + + } + + +} + task Purple { input { String normalName diff --git a/picard.wdl b/picard.wdl index 1afa5ea7..88ddd313 100644 --- a/picard.wdl +++ b/picard.wdl @@ -315,6 +315,70 @@ task CollectTargetedPcrMetrics { } } +task CollectWgsMetrics { + input { + File inputBam + File inputBamIndex + File referenceFasta + File referenceFastaDict + File referenceFastaFai + String outputPath = "./wgs_metrics.txt" + + Int? minimumMappingQuality + Int? minimumBaseQuality + Int? coverageCap + + String memory = "25G" + String javaXmx = "24G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + CollectWgsMetrics \ + REFERENCE_SEQUENCE=~{referenceFasta} \ + INPUT=~{inputBam} \ + OUTPUT=~{outputPath} \ + ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ + ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ + ~{"OVERAGE_CAP=" + coverageCap} + } + + output { + File metrics = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputPath: {description: "The path picard CollectWgsMetrics' output should be written to.", category: "common"} + minimumMappingQuality: {description: "Equivalent to picard CollectWgsMetrics' MINIMUM_MAPPING_QUALITY option.", category: "advanced"} + minimumBaseQuality: {description: "Equivalent to picard CollectWgsMetrics' MINIMUM_BASE_QUALITY option.", category: "advanced"} + coverageCap: {description: "Equivalent to picard CollectWgsMetrics' OVERAGE_CAP option.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CreateSequenceDictionary { input { File inputFile From 9896f4fcaba3d5ee9b070a03a21bc23484037fb1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 16 Dec 2020 14:08:56 +0100 Subject: [PATCH 0741/1208] add purple outputs --- bcftools.wdl | 2 +- bwa.wdl | 2 +- gridss.wdl | 2 +- hmftools.wdl | 39 ++++++++++++++++++++++++++++++++++----- sambamba.wdl | 2 +- 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1dba7611..c91460bb 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -50,7 +50,7 @@ task Annotate { Int threads = 0 String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 10 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bwa.wdl b/bwa.wdl index fdeb870f..44cfc9fe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -34,7 +34,7 @@ task Mem { Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 260 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } diff --git a/gridss.wdl b/gridss.wdl index c444c854..88655442 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,7 +35,7 @@ task GRIDSS { Int jvmHeapSizeGb = 30 Int threads = 2 - Int timeMinutes = ceil(1440 / threads) + 10 + Int timeMinutes = ceil(2880 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } diff --git a/hmftools.wdl b/hmftools.wdl index f9a606e7..86d90332 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "33G" String javaXmx = "32G" - Int timeMinutes = 60 + Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -114,7 +114,7 @@ task Cobalt { Int threads = 1 String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 60 + Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" } @@ -312,8 +312,6 @@ task HealthChecker { output { } - - } task Purple { @@ -327,6 +325,7 @@ task Purple { File somaticVcf File filteredSvVcf File fullSvVcf + File fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -365,7 +364,37 @@ task Purple { } output { - #TODO + File driverCatalogTsv = "~{outputDir}/~{tumorName}.driver.catalog.tsv" + File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" + File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" + File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" + File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv" + File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv" + File purpleQc = "~{outputDir}/~{tumorName}.purple.qc" + File purpleSegmentTsv = "~{outputDir}/~{tumorName}.purple.segment.tsv" + File purpleSomaticClonalityTsv = "~{outputDir}/~{tumorName}.purple.somatic.clonality.tsv" + File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" + File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" + File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" + File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" + File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" + File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" + File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png" + File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png" + File mapPlot = "~{outputDir}/plot/~{tumorName}.map.png" + File purityRangePlot = "~{outputDir}/plot/~{tumorName}.purity.range.png" + File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" + File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" + File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" + File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" + File purpleVersion = "~{outputDir}/purple.version" + Array[File] outputs = [driverCatalogTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, + purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, + purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, + purpleVersion] + Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, + segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] } runtime { diff --git a/sambamba.wdl b/sambamba.wdl index cd8da21e..3fc57c65 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -41,7 +41,7 @@ task Markdup { Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 16) / threads } String bamIndexPath = sub(outputPath, "\.bam$", ".bai") From df51100b8ffd6cb2dee27859b46ef94d901f4715 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Tue, 22 Dec 2020 13:41:30 +0100 Subject: [PATCH 0742/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22f41826..424dc764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. + Fixed the `size` call in the default for gffread's timeMinutes, to retrieve From f60a018191e1b96a5abdfae8b68d4ae4d3ee3b06 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Tue, 22 Dec 2020 13:42:05 +0100 Subject: [PATCH 0743/1208] add tasks to create input files for DGE analysis --- prepareShiny.wdl | 108 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 prepareShiny.wdl diff --git a/prepareShiny.wdl b/prepareShiny.wdl new file mode 100644 index 00000000..d304798d --- /dev/null +++ b/prepareShiny.wdl @@ -0,0 +1,108 @@ +version 1.0 + +# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task CreateDesignMatrix { + input { + File countTable + String shinyDir = "." + + Int threads = 1 + String memory = "5G" + Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + } + + command { + set -e + mkdir -p ${shinyDir} + predex design \ + -i ${countTable} \ + -o ${shinyDir} + } + + output { + File dgeDesign = shinyDir + "/design_matrix.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + countTable: {description: "The created count table from HTseq.", category: "required"} + shinyDir: {description: "The directory to write the output to.", category: "required"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task CreateAnnotation { + input { + File referenceFasta + File referenceGtfFile + String shinyDir = "." + + Int threads = 1 + String memory = "10G" + Int timeMinutes = 90 + String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + } + + command { + set -e + mkdir -p ${shinyDir} + predex annotation \ + -f ${referenceFasta} \ + -g ${referenceGtfFile} \ + -o ${shinyDir} + } + + output { + File dgeAnnotation = shinyDir + "/annotation.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + referenceFasta: {description: "The reference Fasta file.", category: "required"} + referenceGtfFile: {description: "The reference GTF file.", category: "required"} + shinyDir: {description: "The directory to write the output to.", category: "required"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From fcd32243e1aaa62a842435e5cc2671843d8afc54 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Thu, 24 Dec 2020 13:12:23 +0100 Subject: [PATCH 0744/1208] style update --- prepareShiny.wdl | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/prepareShiny.wdl b/prepareShiny.wdl index d304798d..81354a16 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -33,10 +33,10 @@ task CreateDesignMatrix { command { set -e - mkdir -p ${shinyDir} + mkdir -p ~{shinyDir} predex design \ - -i ${countTable} \ - -o ${shinyDir} + -i ~{countTable} \ + -o ~{shinyDir} } output { @@ -51,14 +51,16 @@ task CreateDesignMatrix { } parameter_meta { + # inputs countTable: {description: "The created count table from HTseq.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - + shinyDir: {description: "The directory to write the output to.", category: "required"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dgeDesign: {description: "Design matrix template to add sample information for DGE analysis."} } } @@ -76,11 +78,11 @@ task CreateAnnotation { command { set -e - mkdir -p ${shinyDir} + mkdir -p ~{shinyDir} predex annotation \ - -f ${referenceFasta} \ - -g ${referenceGtfFile} \ - -o ${shinyDir} + -f ~{referenceFasta} \ + -g ~{referenceGtfFile} \ + -o ~{shinyDir} } output { @@ -95,14 +97,16 @@ task CreateAnnotation { } parameter_meta { + # inputs referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtfFile: {description: "The reference GTF file.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - + shinyDir: {description: "The directory to write the output to.", category: "required"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + dgeAnnotation: {description: "Annotation file for DGE analysis."} } } From ca452303add0b2afeabb6595e09c7a036df58fc3 Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Mon, 28 Dec 2020 10:31:34 +0100 Subject: [PATCH 0745/1208] annotation update --- prepareShiny.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/prepareShiny.wdl b/prepareShiny.wdl index 81354a16..13cd0b1c 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -28,15 +28,15 @@ task CreateDesignMatrix { Int threads = 1 String memory = "5G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } command { set -e mkdir -p ~{shinyDir} predex design \ - -i ~{countTable} \ - -o ~{shinyDir} + --input ~{countTable} \ + --output ~{shinyDir} } output { @@ -70,19 +70,19 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - Int threads = 1 - String memory = "10G" - Int timeMinutes = 90 - String dockerImage = "quay.io/biocontainers/predex:0.9.1--pyh3252c3a_0" + Int threads = 2 + String memory = "5G" + Int timeMinutes = 30 + String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } command { set -e mkdir -p ~{shinyDir} predex annotation \ - -f ~{referenceFasta} \ - -g ~{referenceGtfFile} \ - -o ~{shinyDir} + --fasta ~{referenceFasta} \ + --gtf ~{referenceGtfFile} \ + --output ~{shinyDir} } output { From 48d468d7c97e4b9e3ee892ff49b3fdda4fee9de9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 Jan 2021 11:11:41 +0100 Subject: [PATCH 0746/1208] add note to HealthChecker --- hmftools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/hmftools.wdl b/hmftools.wdl index 86d90332..760fb63f 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -270,6 +270,7 @@ task GripssHardFilterApplicationKt { } task HealthChecker { + # WIP input { String normalName String tumorName From c482e833fa60a8a138b8045dc3f044be0655599c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 6 Jan 2021 11:31:52 +0100 Subject: [PATCH 0747/1208] comment out healthchecker task and remove duplicate input in bcftools annotate --- bcftools.wdl | 4 +-- hmftools.wdl | 90 ++++++++++++++++++++++++++-------------------------- 2 files changed, 46 insertions(+), 48 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 8721540a..14889dff 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,9 +44,7 @@ task Annotate { String? regions File? regionsFile File? renameChrs - File? samplesFile - Boolean singleOverlaps = false - + File? samplesFile Int threads = 0 String memory = "256M" diff --git a/hmftools.wdl b/hmftools.wdl index 760fb63f..16313fca 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,51 +269,51 @@ task GripssHardFilterApplicationKt { } } -task HealthChecker { - # WIP - input { - String normalName - String tumorName - - String javaXmx = "10G" - } - - command { - java -Xmx10G \ - -jar /opt/tools/health-checker/3.1/health-checker.jar \ - -reference ~{normalName} \ - -tumor ~{tumorName} \ - -metrics_dir ~{metricsPath} \ - -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ - -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -output_dir ~{outputDir} - } - - # super("health-checker", - # Versions.HEALTH_CHECKER, - # "health-checker.jar", - # "10G", - # Lists.newArrayList("-reference", - # referenceSampleName, - # "-tumor", - # tumorSampleName, - # "-ref_wgs_metrics_file", - # referenceMetricsPath, - # "-tum_wgs_metrics_file", - # tumorMetricsPath, - # "-ref_flagstat_file", - # referenceFlagstatPath, - # "-tum_flagstat_file", - # tumorFlagstatPath, - # "-purple_dir", - # purplePath, - # "-output_dir", - # outputPath)); - - output { - - } -} +# task HealthChecker { +# # WIP +# input { +# String normalName +# String tumorName +# +# String javaXmx = "10G" +# } +# +# command { +# java -Xmx10G \ +# -jar /opt/tools/health-checker/3.1/health-checker.jar \ +# -reference ~{normalName} \ +# -tumor ~{tumorName} \ +# -metrics_dir ~{metricsPath} \ +# -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ +# -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ +# -output_dir ~{outputDir} +# } +# +# # super("health-checker", +# # Versions.HEALTH_CHECKER, +# # "health-checker.jar", +# # "10G", +# # Lists.newArrayList("-reference", +# # referenceSampleName, +# # "-tumor", +# # tumorSampleName, +# # "-ref_wgs_metrics_file", +# # referenceMetricsPath, +# # "-tum_wgs_metrics_file", +# # tumorMetricsPath, +# # "-ref_flagstat_file", +# # referenceFlagstatPath, +# # "-tum_flagstat_file", +# # tumorFlagstatPath, +# # "-purple_dir", +# # purplePath, +# # "-output_dir", +# # outputPath)); +# +# output { +# +# } +# } task Purple { input { From 7988dbb2259f9a396fd19c514c48731e96d49e42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Jan 2021 11:06:09 +0100 Subject: [PATCH 0748/1208] make reference annotation optional for gffcompare --- gffcompare.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index 50cab8a6..8b135479 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -23,7 +23,6 @@ version 1.0 task GffCompare { input { Array[File] inputGtfFiles - File referenceAnnotation # gffcmp is the default used by the program as well. This needs to be # defined in order for the output values to be consistent and correct. String outPrefix = "gffcmp" @@ -40,6 +39,7 @@ task GffCompare { Boolean debugMode = false File? inputGtfList + File? referenceAnnotation String? outputDir File? genomeSequences Int? maxDistanceFreeEndsTerminalExons @@ -64,7 +64,7 @@ task GffCompare { set -e ~{"mkdir -p " + outputDir} gffcompare \ - -r ~{referenceAnnotation} \ + ~{"-r " + referenceAnnotation} \ ~{"-o '" + totalPrefix + "'"} \ ~{"-s " + genomeSequences} \ ~{"-e " + maxDistanceFreeEndsTerminalExons} \ From c22629ff7ec5c57f113ed79e2fc2784ee915b89f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 15 Jan 2021 15:03:13 +0100 Subject: [PATCH 0749/1208] add linx task, add more inputs to sage --- hmftools.wdl | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 16313fca..15f54937 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -315,6 +315,110 @@ task GripssHardFilterApplicationKt { # } # } +task Linx { + input { + String sampleName + File svVcf + File svVcfIndex + Array[File]+ purpleOutput + File referenceFasta + File referenceFastaFai + File referenceFastaDict + String refGenomeVersion + String outputDir = "./linx" + File fragileSiteCsv + File lineElementCsv + File replicationOriginsBed + File viralHostsCsv + File knownFusionCsv + File driverGenePanel + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 30 + String dockerImage = "docker://quay.io/biocontainers/hmftools-linx:1.12--0" + } + + command { + linx -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -sv_vcf ~{svVcf} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -output_dir ~{outputDir} \ + -fragile_site_file ~{fragileSiteCsv} \ + -line_element_file ~{lineElementCsv} \ + -replication_origins_file ~{replicationOriginsBed} \ + -viral_hosts_file ~{viralHostsCsv} \ + -gene_transcripts_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -check_fusions \ + -known_fusion_file ~{knownFusionCsv} \ + -check_drivers \ + -driver_gene_panel ~{driverGenePanel} \ + -chaining_sv_limit 0 \ + -write_vis_data + } + + output { + File driverCatalog = "~{outputDir}/~{sampleName}.driver.catalog.tsv" + File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" + File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" + File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" + File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" + File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" + File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" + File linxViralInserts = "~{outputDir}/~{sampleName}.linx.viral_inserts.tsv" + File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" + File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" + File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" + File linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv" + File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" + File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" + File linxVersion = "~{outputDir}/linx.version" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category: "required"} + svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} + svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} + purpleOutput: {description: "The files produced by PURPLE.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} + lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} + replicationOriginsBed: {description: "Replication timing input in BED format with replication timing as the 4th column.", category: "required"} + viralHostsCsv: {description: "A list of the viruses which were used for annotation of the GRIDSS results.", category: "required"} + knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Purple { input { String normalName @@ -419,7 +523,7 @@ task Purple { referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - driverGenePanel: {description: "A bed file describing the driver gene panel.", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} @@ -444,11 +548,20 @@ task Sage { File panelBed File highConfidenceBed Boolean hg38 = false + Boolean panelOnly = false String outputPath = "./sage.vcf.gz" String? normalName File? normalBam File? normalBamIndex + Int? hotspotMinTumorQual + Int? panelMinTumorQual + Int? hotspotMaxGermlineVaf + Int? hotspotMaxGermlineRelRawBaseQual + Int? panelMaxGermlineVaf + Int? panelMaxGermlineRelRawBaseQual + String? mnvFilterEnabled + File? coverageBed Int threads = 2 String javaXmx = "32G" @@ -470,6 +583,15 @@ task Sage { -panel_bed ~{panelBed} \ -high_confidence_bed ~{highConfidenceBed} \ -assembly ~{true="hg38" false="hg19" hg38} \ + ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \ + ~{"-panel_min_tumor_qual " + panelMinTumorQual} \ + ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \ + ~{"-hotspot_max_germline_rel_raw_base_qual " + hotspotMaxGermlineRelRawBaseQual} \ + ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ + ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ + ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ + ~{"-coverage_bed " + coverage_bed} \ + ~{true="-panel_only" false="" panelOnly} \ -threads ~{threads} \ -out ~{outputPath} } @@ -502,6 +624,13 @@ task Sage { hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"} highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"} + hotspotMinTumorQual: {description: "Equivalent to sage's `hotspot_min_tumor_qual` option.", category: "advanced"} + panelMinTumorQual: {description: "Equivalent to sage's `panel_min_tumor_qual` option.", category: "advanced"} + hotspotMaxGermlineVaf: {description: "Equivalent to sage's `hotspot_max_germline_vaf` option.", category: "advanced"} + hotspotMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `hotspot_max_germline_rel_raw_base_qual` option.", category: "advanced"} + panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} + panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"} + mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From 111a42bf79d1fb8fa6a34d7b567dc4fc04f67e7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 21 Jan 2021 14:23:53 +0100 Subject: [PATCH 0750/1208] fix typos --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 15f54937..6de3f777 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -590,7 +590,7 @@ task Sage { ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \ ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \ ~{"-mnv_filter_enabled " + mnvFilterEnabled} \ - ~{"-coverage_bed " + coverage_bed} \ + ~{"-coverage_bed " + coverageBed} \ ~{true="-panel_only" false="" panelOnly} \ -threads ~{threads} \ -out ~{outputPath} From 96fa1bc6ba59825f051c0577d414027fd58f10c4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jan 2021 16:38:49 +0100 Subject: [PATCH 0751/1208] fix some issues, add flagstat --- bcftools.wdl | 1 - hmftools.wdl | 6 +++++- picard.wdl | 4 ++-- sambamba.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 4 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 14889dff..b239320d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -106,7 +106,6 @@ task Annotate { inputFile: {description: "A vcf or bcf file.", category: "required"} inputFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} - outputType: {description: "Output type: v=vcf, z=vcf.gz, b=bcf, u=uncompressed bcf", category: "advanced"} annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "common"} annsFileIndex: {description: "The index for annsFile.", category: "common"} collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} diff --git a/hmftools.wdl b/hmftools.wdl index 6de3f777..67c49be3 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -341,7 +341,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 30 - String dockerImage = "docker://quay.io/biocontainers/hmftools-linx:1.12--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.12--0" } command { @@ -381,6 +381,10 @@ task Linx { File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv" File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" File linxVersion = "~{outputDir}/linx.version" + Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, + linxLinks, linxSvs, linxViralInserts, linxVisCopyNumber, + linxVisFusion, linxVisGeneExon, linxVisProteinDomain, + linxVisSegments, linxVisSvData, linxVersion] } runtime { diff --git a/picard.wdl b/picard.wdl index d52b9cc7..8dc4e0bf 100644 --- a/picard.wdl +++ b/picard.wdl @@ -473,10 +473,10 @@ task CollectWgsMetrics { CollectWgsMetrics \ REFERENCE_SEQUENCE=~{referenceFasta} \ INPUT=~{inputBam} \ - OUTPUT=~{outputPath} \ + OUTPUT=~{outputPath} \ ~{"MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality} \ ~{"MINIMUM_BASE_QUALITY=" + minimumBaseQuality} \ - ~{"OVERAGE_CAP=" + coverageCap} + ~{"COVERAGE_CAP=" + coverageCap} } output { diff --git a/sambamba.wdl b/sambamba.wdl index 0e9a901c..bb63f665 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -20,6 +20,49 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +task Flagstat { + input { + File inputBam + File inputBamIndex + String outputPath = "./flagstat.txt" + + Int threads = 2 + String memory = "8G" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" + } + + command { + sambamba flagstat \ + -t ~{threads} \ + ~{inputBam} \ + > ~{outputPath} + } + + output { + File stats = outputPath + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputBam: {description: "The input BAM file.", category: "required"} + inputBamIndex: {description: "The index for the BAM file.", category: "required"} + outputPath: {description: "The path to write the ouput to.", category: "required"} + + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + + task Markdup { input { Array[File] inputBams From 8b51723e40a28d8894015f8b4dad21fcb0cb4bd1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Jan 2021 16:39:56 +0100 Subject: [PATCH 0752/1208] add extractSigPredictHRD --- extractSigPredictHRD.wdl | 69 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 extractSigPredictHRD.wdl diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl new file mode 100644 index 00000000..6aa5ff1d --- /dev/null +++ b/extractSigPredictHRD.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task ExtractSigPredictHRD { + input { + String outputDir = "." + String sampleName + File snvIndelVcf + File snvIndelVcfIndex + File svVcf + File svVcfIndex + + String memory = "8G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" + } + + command { + extractSigPredictHRD.R \ + ~{outputDir} \ + ~{sampleName} \ + ~{snvIndelVcf} \ + ~{svVcf} \ + } + + output { + File chordPrediction = "~{outputDir}/~{sampleName}_chord_prediction.txt" + File chordSignatures = "~{outputDir}/~{sampleName}_chord_signatures.txt" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + outputDir: {description: "The directory the outout will be written to.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + snvIndelVcf: {description: "A VCF file with SNVs and indels.", category: "required"} + snvIndelVcfIndex: {description: "The index for the SNV/indel VCF file.", category: "required"} + svVcf: {description: "A VCF file with SVs.", category: "required"} + svVcfIndex: {description: "The index for the SV VCF file.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} \ No newline at end of file From a4ebccba572cb4b0114c80b91083eafc203fa92b Mon Sep 17 00:00:00 2001 From: tomkuipers1402 Date: Thu, 4 Feb 2021 09:22:33 +0100 Subject: [PATCH 0753/1208] change threads --- prepareShiny.wdl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/prepareShiny.wdl b/prepareShiny.wdl index 13cd0b1c..d669e2d1 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -25,7 +25,6 @@ task CreateDesignMatrix { File countTable String shinyDir = "." - Int threads = 1 String memory = "5G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" @@ -44,7 +43,6 @@ task CreateDesignMatrix { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -53,8 +51,7 @@ task CreateDesignMatrix { parameter_meta { # inputs countTable: {description: "The created count table from HTseq.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - threads: {description: "The number of threads to use.", category: "advanced"} + shinyDir: {description: "The directory to write the output to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -70,7 +67,6 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - Int threads = 2 String memory = "5G" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" @@ -90,7 +86,6 @@ task CreateAnnotation { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -100,8 +95,7 @@ task CreateAnnotation { # inputs referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtfFile: {description: "The reference GTF file.", category: "required"} - shinyDir: {description: "The directory to write the output to.", category: "required"} - threads: {description: "The number of threads to use.", category: "advanced"} + shinyDir: {description: "The directory to write the output to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 558c8088dee1d252fb668303874684fd62741409 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 4 Feb 2021 15:38:11 +0100 Subject: [PATCH 0754/1208] add health-checker --- hmftools.wdl | 106 +++++++++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 45 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 67c49be3..5bad1dbe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -269,51 +269,67 @@ task GripssHardFilterApplicationKt { } } -# task HealthChecker { -# # WIP -# input { -# String normalName -# String tumorName -# -# String javaXmx = "10G" -# } -# -# command { -# java -Xmx10G \ -# -jar /opt/tools/health-checker/3.1/health-checker.jar \ -# -reference ~{normalName} \ -# -tumor ~{tumorName} \ -# -metrics_dir ~{metricsPath} \ -# -amber_dir ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ -# -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -# -output_dir ~{outputDir} -# } -# -# # super("health-checker", -# # Versions.HEALTH_CHECKER, -# # "health-checker.jar", -# # "10G", -# # Lists.newArrayList("-reference", -# # referenceSampleName, -# # "-tumor", -# # tumorSampleName, -# # "-ref_wgs_metrics_file", -# # referenceMetricsPath, -# # "-tum_wgs_metrics_file", -# # tumorMetricsPath, -# # "-ref_flagstat_file", -# # referenceFlagstatPath, -# # "-tum_flagstat_file", -# # tumorFlagstatPath, -# # "-purple_dir", -# # purplePath, -# # "-output_dir", -# # outputPath)); -# -# output { -# -# } -# } +task HealthChecker { + # WIP + input { + String outputDir = "." + String normalName + File normalFlagstats + File normalMetrics + String tumorName + File tumorFlagstats + File tumorMetrics + Array[File]+ purpleOutput + + String javaXmx = "10G" + String memory = "11G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biowdl/health-checker:3.2" + } + + command { + set -e + mkdir -p ~{outputDir} + health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -reference ~{normalName} \ + -ref_flagstat_file ~{normalFlagstats} \ + -ref_wgs_metrics_file ~{normalMetrics} \ + -tumor ~{tumorName} \ + -tum_flagstat_file ~{tumorFlagstats} \ + -tum_wgs_metrics_file ~{tumorMetrics} \ + -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ + -output_dir ~{outputDir} + } + + + output { + File? healthCheckSucceeded = "~{outputDir}/~{tumorName}.HealthCheckSucceeded" + File? healthCheckFailed = "~{outputDir}/~{tumorName}.HealthCheckFailed" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + outputDir: {description: "The path the output will be written to.", category:"required"} + normalName: {description: "The name of the normal sample.", category: "required"} + normalFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + normalMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} + tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} + purpleOutput: {description: "The files from purple's output directory.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} task Linx { input { From eac2b302158e412df419705eba39ebaeedc1c11f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 9 Feb 2021 16:10:52 +0100 Subject: [PATCH 0755/1208] small adjustments --- bwa.wdl | 4 +++- gridss.wdl | 6 +++--- hmftools.wdl | 12 ++++++------ sambamba.wdl | 2 +- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index 203f0dde..e2393481 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -28,6 +28,7 @@ task Mem { String outputPrefix Boolean sixtyFour = false Boolean usePostalt = false + Boolean useSoftclippingForSupplementary = false Int sortMemoryPerThreadGb = 2 Int compressionLevel = 1 @@ -36,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 260 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 500 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -56,6 +57,7 @@ task Mem { mkdir -p "$(dirname ~{outputPrefix})" bwa mem \ -t ~{threads} \ + ~{if useSoftclippingForSupplementary then "-Y" else ""} \ ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \ ~{bwaIndex.fastaFile} \ ~{read1} \ diff --git a/gridss.wdl b/gridss.wdl index 9bafa6d6..0148fcf6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,9 +35,9 @@ task GRIDSS { String? normalLabel Int jvmHeapSizeGb = 30 - Int threads = 2 - Int timeMinutes = ceil(2880 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + Int threads = 4 + Int timeMinutes = ceil(5760 / threads) + 10 + String dockerImage = "quay.io/biocontainers/gridss:2.9.3--0" } command { diff --git a/hmftools.wdl b/hmftools.wdl index 5bad1dbe..90564060 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -181,13 +181,13 @@ task GripssApplicationKt { String memory = "25G" String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ @@ -234,13 +234,13 @@ task GripssHardFilterApplicationKt { String memory = "25G" String javaXmx = "24G" - Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.8--0" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.8-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} diff --git a/sambamba.wdl b/sambamba.wdl index bb63f665..5284363e 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 16) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 32) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 2792266fa2950ec9cbe15530374465a99c65a43a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 Feb 2021 09:52:04 +0100 Subject: [PATCH 0756/1208] update versions, memory, etc --- bwa.wdl | 2 +- extractSigPredictHRD.wdl | 2 ++ gridss.wdl | 9 ++++++++- hmftools.wdl | 29 ++++++++++++++--------------- sambamba.wdl | 6 +++--- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index e2393481..faa4121a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.5) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 6aa5ff1d..69c41ef8 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -28,6 +28,7 @@ task ExtractSigPredictHRD { File snvIndelVcfIndex File svVcf File svVcfIndex + Boolean hg38 = false String memory = "8G" Int timeMinutes = 15 @@ -40,6 +41,7 @@ task ExtractSigPredictHRD { ~{sampleName} \ ~{snvIndelVcf} \ ~{svVcf} \ + ~{if hg38 then "RG_38" else "RG_37"} } output { diff --git a/gridss.wdl b/gridss.wdl index 0148fcf6..98d730cf 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,23 +33,28 @@ task GRIDSS { File? normalBam File? normalBai String? normalLabel + File? blacklistBed + File? repeatmaskerBed Int jvmHeapSizeGb = 30 Int threads = 4 Int timeMinutes = ceil(5760 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.3--0" + String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } command { set -e mkdir -p "$(dirname ~{outputPrefix})" gridss \ + -w . \ --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + ~{"--blacklist " + blacklistBed} \ + ~{"--repeatmaskerbed " + repeatmaskerBed} ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz @@ -80,6 +85,8 @@ task GRIDSS { normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"} normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} + blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} + repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} diff --git a/hmftools.wdl b/hmftools.wdl index 90564060..e98ac7ba 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -115,7 +115,7 @@ task Cobalt { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 1200 - String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.10--0" + String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } command { @@ -172,6 +172,8 @@ task GripssApplicationKt { input { File inputVcf String outputPath = "gripss.vcf.gz" + String tumorName + String normalName File referenceFasta File referenceFastaFai File referenceFastaDict @@ -182,13 +184,15 @@ task GripssApplicationKt { String memory = "25G" String javaXmx = "24G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ + -tumor ~{tumorName} \ + ~reference ~{normalName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ @@ -235,12 +239,12 @@ task GripssHardFilterApplicationKt { String memory = "25G" String javaXmx = "24G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.7-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} @@ -357,7 +361,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.12--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" } command { @@ -455,13 +459,13 @@ task Purple { File referenceFastaFai File referenceFastaDict File driverGenePanel - File hotspots + File somaticHotspots Int threads = 1 Int timeMinutes = 60 String memory = "13G" String javaXmx = "12G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.51--1" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" } command { @@ -479,13 +483,8 @@ task Purple { -ref_genome ~{referenceFasta} \ -driver_catalog \ -driver_gene_panel ~{driverGenePanel} \ - -hotspots ~{hotspots} \ + -somatic_hotspots ~{somaticHotspots} \ -threads ~{threads} - - # TODO if shallow also the following: - #-highly_diploid_percentage 0.88 \ - #-somatic_min_total 100 \ - #-somatic_min_purity_spread 0.1 } output { @@ -587,7 +586,7 @@ task Sage { String javaXmx = "32G" String memory = "33G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.2--2" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" } command { diff --git a/sambamba.wdl b/sambamba.wdl index 5284363e..b6ef5e9b 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -34,8 +34,8 @@ task Flagstat { command { sambamba flagstat \ - -t ~{threads} \ - ~{inputBam} \ + -t ~{threads} \ + ~{inputBam} \ > ~{outputPath} } @@ -84,7 +84,7 @@ task Markdup { # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 32) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From 943f9541ebc002ea576898067b7f220112cb79fc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 16 Feb 2021 13:56:15 +0100 Subject: [PATCH 0757/1208] fix parameter_meta purple --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index e98ac7ba..3fe845a6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -543,7 +543,7 @@ task Purple { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + somaticHotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 8283c5099ba6fad50b34043033380e2898d3db66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 18 Feb 2021 11:03:27 +0100 Subject: [PATCH 0758/1208] fix missing backslash --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 98d730cf..b4b36b01 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -54,7 +54,7 @@ task GRIDSS { ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ - ~{"--repeatmaskerbed " + repeatmaskerBed} + ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz From adc3523872df29405e1741eaa2dfa2a67e61a51d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 11:00:46 +0100 Subject: [PATCH 0759/1208] fix sage --- hmftools.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3fe845a6..49e4eeb4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -590,9 +590,7 @@ task Sage { } command { - java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-sage-2.2-2/sage.jar \ - com.hartwig.hmftools.sage.SageApplication \ + SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ ~{"-reference " + normalName} \ From a8314de9c3a2746eb44bf041fe1849c49241e547 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 16:30:52 +0100 Subject: [PATCH 0760/1208] add -c to stringtie --- stringtie.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/stringtie.wdl b/stringtie.wdl index d3a6f73d..9c2f3cfc 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -31,6 +31,7 @@ task Stringtie { Boolean? firstStranded Boolean? secondStranded String? geneAbundanceFile + Float? minimumCoverage Int threads = 1 String memory = "2G" @@ -47,6 +48,7 @@ task Stringtie { ~{true="-e" false="" skipNovelTranscripts} \ ~{true="--rf" false="" firstStranded} \ ~{true="--fr" false="" secondStranded} \ + ~{"-c " + minimumCoverage} \ -o ~{assembledTranscriptsFile} \ ~{"-A " + geneAbundanceFile} \ ~{bam} @@ -74,6 +76,7 @@ task Stringtie { firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"} secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"} geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"} + minimumCoverage: {description: "The minimum coverage for a transcript to be shown in the output.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From f468bd568b5d9fcbd66872934837a4f88a4f2f0b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Feb 2021 16:43:57 +0100 Subject: [PATCH 0761/1208] add index to htseq --- htseq.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/htseq.wdl b/htseq.wdl index dfa3fcf2..ef4ae0a3 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -23,6 +23,7 @@ version 1.0 task HTSeqCount { input { Array[File]+ inputBams + Array[File]+ inputBamIndexes File gtfFile String outputTable = "output.tsv" String order = "pos" @@ -34,7 +35,7 @@ task HTSeqCount { Int nprocesses = 1 String memory = "8G" - Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60) + Int timeMinutes = 1440 #10 + ceil(size(inputBams, "G") * 60) FIXME String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" } From 070a5d81abd11bc0318f4957b7ef418df2f61c40 Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 17:39:45 +0100 Subject: [PATCH 0762/1208] task: add duphold.wdl --- duphold.wdl | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 duphold.wdl diff --git a/duphold.wdl b/duphold.wdl new file mode 100644 index 00000000..80fe31d2 --- /dev/null +++ b/duphold.wdl @@ -0,0 +1,75 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Duphold { + input { + File inputVcf + File bamFile + File bamIndex + File referenceFasta + File referenceFastaFai + String sample + String outputPath = "./duphold.vcf" + + String memory = "15G" + Int timeMinutes = 1440 + String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + export DUPHOLD_SAMPLE_NAME=~{sample} + duphold \ + -v ~{inputVcf} \ + -b ~{bamFile} \ + -f ~{referenceFasta} \ + -o ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputVcf: {description: "The VCF file to process.", category: "required"} + bamFile: {description: "The bam file to process.", category: "required"} + bamIndex: {description: "The index of the bam file.", category: "required"} + referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} + referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } + sample: {description: "The name of the sample.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Duphold annotated VCF file."} + } +} From aef20c2a69816a367700441ba9d4a121faf9a72f Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 17:42:24 +0100 Subject: [PATCH 0763/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 424dc764..8d6d1b76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Duphold: add duphold.wdl + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. From 5fc58ce1f5585a5bb4078b095674b67aba8d8f7d Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 21:43:20 +0100 Subject: [PATCH 0764/1208] add bcftools view filtering options --- bcftools.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28380dea..5f6c2a16 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,7 +290,10 @@ task View { input { File inputFile String outputPath = "output.vcf" - + + String? exclude + String? include + Boolean excludeUncalled = false String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -302,6 +305,7 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ + ~{true="--exclude-uncalled" false="" firstAlleleOnly} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -324,6 +328,8 @@ task View { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} + exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 70d7a2b361a8faa2cab4b02accd2abd8da3068d0 Mon Sep 17 00:00:00 2001 From: cagaser Date: Sun, 21 Feb 2021 23:51:17 +0100 Subject: [PATCH 0765/1208] add option for bcftools view filtering --- bcftools.wdl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5f6c2a16..50b08ee6 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,10 +290,9 @@ task View { input { File inputFile String outputPath = "output.vcf" - String? exclude String? include - Boolean excludeUncalled = false + Boolean excludeUncalled = false String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" @@ -305,7 +304,9 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ - ~{true="--exclude-uncalled" false="" firstAlleleOnly} \ + ~{"--exclude " + exclude} \ + ~{"--include " + include} \ + ~{true="--exclude-uncalled" false="" excludeUncalled} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -330,6 +331,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + excludeUncalled: {description: "exclude sites without a called genotype (see man page for details).", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 42f6cd2a9c38ba2da8f07db2f7df17b70d99a5d9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 22 Feb 2021 10:27:48 +0100 Subject: [PATCH 0766/1208] fix purple output for newer version --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 49e4eeb4..31330a7d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -488,7 +488,7 @@ task Purple { } output { - File driverCatalogTsv = "~{outputDir}/~{tumorName}.driver.catalog.tsv" + File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" @@ -512,7 +512,7 @@ task Purple { File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" - Array[File] outputs = [driverCatalogTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, From 2d41a2e22783b6208c1cdf8e7906e388bbfb7a89 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 22 Feb 2021 13:26:08 +0100 Subject: [PATCH 0767/1208] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d6d1b76..cbd083c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled) + Duphold: add duphold.wdl + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. From 8238579043ccd2df72ef7b270e9d44248b257715 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:27 +0100 Subject: [PATCH 0768/1208] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbd083c3..4ee68a91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled) ++ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. From ff4edf7a505234bef2e3102d06152148ae84eaa0 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:33 +0100 Subject: [PATCH 0769/1208] Update CHANGELOG.md Co-authored-by: Jasper --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ee68a91..5e175c6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). -+ Duphold: add duphold.wdl ++ Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. From 505c4fc02f8fa22cd512e1c890a984febcd89531 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:40 +0100 Subject: [PATCH 0770/1208] Update bcftools.wdl Co-authored-by: Jasper --- bcftools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 50b08ee6..d01a0c03 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,9 +290,11 @@ task View { input { File inputFile String outputPath = "output.vcf" + Boolean excludeUncalled = false + String? exclude String? include - Boolean excludeUncalled = false + String memory = "256M" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" From 119e2aca92129ccd520ea4f0d9ab8ca768330e7e Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Wed, 24 Feb 2021 09:26:46 +0100 Subject: [PATCH 0771/1208] Update bcftools.wdl Co-authored-by: Jasper --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index d01a0c03..4dc4edb5 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -333,7 +333,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} - excludeUncalled: {description: "exclude sites without a called genotype (see man page for details).", category: "advanced"} + excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ee6e66bea74597352161d3da231ce4df45acf39e Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 4 Mar 2021 16:42:38 +0100 Subject: [PATCH 0772/1208] add tmpdir --- umi-tools.wdl | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 6524d656..b05fcace 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -78,10 +78,13 @@ task Dedup { File inputBam File inputBamIndex String outputBamPath + String tmpDir + Boolean paired = true String? umiSeparator String? statsPrefix + String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) @@ -93,13 +96,14 @@ task Dedup { command { set -e - mkdir -p "$(dirname ~{outputBamPath})" + mkdir -p "$(dirname ~{outputBamPath})" "~{tmpDir}" umi_tools dedup \ - --stdin ~{inputBam} \ - --stdout ~{outputBamPath} \ + --stdin=~{inputBam} \ + --stdout=~{outputBamPath} \ ~{"--output-stats " + statsPrefix} \ ~{"--umi-separator=" + umiSeparator} \ - ~{true="--paired" false="" paired} + ~{true="--paired" false="" paired} \ + --temp-dir=~{tmpDir} \ samtools index ~{outputBamPath} ~{outputBamIndex} } @@ -122,6 +126,7 @@ task Dedup { inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} + outputBamPath: {description: "Temporary directory.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} From 4edc1284f86c713dd5e23e8dba79c8a0f3a20219 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 4 Mar 2021 17:03:12 +0100 Subject: [PATCH 0773/1208] update umi-tools.wdl --- CHANGELOG.md | 1 + umi-tools.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e175c6d..437294cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ UMI-tools (dedup): Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. diff --git a/umi-tools.wdl b/umi-tools.wdl index b05fcace..db888603 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -126,7 +126,7 @@ task Dedup { inputBam: {description: "The input BAM file.", categrory: "required"} inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"} outputBamPath: {description: "The location to write the output BAM file to.", category: "required"} - outputBamPath: {description: "Temporary directory.", category: "advanced"} + tmpDir: {description: "Temporary directory.", category: "advanced"} paired: {description: "Whether or not the data is paired.", category: "common"} umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"} statsPrefix: {description: "The prefix for the stats files.", category: "advanced"} From 2a151b5014c34ea28498da909806cfa70da65d47 Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 8 Mar 2021 10:27:12 +0100 Subject: [PATCH 0774/1208] add default tmpdir --- umi-tools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index db888603..5e08e14d 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -78,7 +78,7 @@ task Dedup { File inputBam File inputBamIndex String outputBamPath - String tmpDir + String tmpDir = "./umiToolsDedupTmpDir" Boolean paired = true From f8f2b9e4058d29bdd21bb92694bb425c3724f31b Mon Sep 17 00:00:00 2001 From: cedrick Date: Mon, 8 Mar 2021 10:27:55 +0100 Subject: [PATCH 0775/1208] update default dockerimage --- umi-tools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 5e08e14d..1a7db327 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -89,7 +89,7 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" + String dockerImage = "quay.io/biocontainers/umi_tools:1.1.1--py38h0213d0e_1" } String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") From 3b5f1476fb34d215d6332b127995ff3ab1b82f20 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 8 Mar 2021 11:02:53 +0100 Subject: [PATCH 0776/1208] remove umitools deduped BAM index output --- umi-tools.wdl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 1a7db327..e909e481 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -103,13 +103,11 @@ task Dedup { ~{"--output-stats " + statsPrefix} \ ~{"--umi-separator=" + umiSeparator} \ ~{true="--paired" false="" paired} \ - --temp-dir=~{tmpDir} \ - samtools index ~{outputBamPath} ~{outputBamIndex} + --temp-dir=~{tmpDir} } output { File deduppedBam = outputBamPath - File deduppedBamIndex = outputBamIndex File? editDistance = "~{statsPrefix}_edit_distance.tsv" File? umiStats = "~{statsPrefix}_per_umi.tsv" File? positionStats = "~{statsPrefix}_per_umi_per_position.tsv" @@ -136,7 +134,6 @@ task Dedup { # outputs deduppedBam: {description: "Deduplicated BAM file."} - deduppedBamIndex: {description: "Index of the deduplicated BAM file."} editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."} umiStats: {description: "UMI-level summary statistics."} positionStats: {description: "The counts for unique combinations of UMI and position."} From 0f448cf27ea9812f938a37cb783bd7ce115d32a6 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Mon, 8 Mar 2021 13:08:38 +0100 Subject: [PATCH 0777/1208] Update umi-tools.wdl Co-authored-by: Davy Cats --- umi-tools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 1a7db327..20f1a37e 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -84,7 +84,6 @@ task Dedup { String? umiSeparator String? statsPrefix - String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) From 7c8209efa3f8c9ed6d9c716c3357008d8be7e809 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 8 Mar 2021 13:09:50 +0100 Subject: [PATCH 0778/1208] remove comment --- umi-tools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index e909e481..e3c833f8 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -88,7 +88,6 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9). String dockerImage = "quay.io/biocontainers/umi_tools:1.1.1--py38h0213d0e_1" } From 8623c57dbca49543e4a5ee8108316ef46242bcde Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Mar 2021 14:05:13 +0100 Subject: [PATCH 0779/1208] add circos configs to purple output --- hmftools.wdl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 31330a7d..2fad41fe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -512,6 +512,17 @@ task Purple { File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" + File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" + File circosCond = "~{outputDir}/circos/~{tumorName}.circos.conf" + File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" + File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" + File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" + File circosGaps = "~{outputDir}/circos/gaps.txt" + File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" + File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos" + File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf" + File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" + File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, @@ -519,6 +530,7 @@ task Purple { purpleVersion] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] + Array[File] circos = [] } runtime { From 13967b1793fc585d9f3753d87b618fd2c6819736 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Mar 2021 14:13:06 +0100 Subject: [PATCH 0780/1208] add array for circos confs --- hmftools.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2fad41fe..dc31f41b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -513,7 +513,7 @@ task Purple { File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" - File circosCond = "~{outputDir}/circos/~{tumorName}.circos.conf" + File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" @@ -530,7 +530,9 @@ task Purple { purpleVersion] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] - Array[File] circos = [] + Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, + circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, + circosSnp] } runtime { From beb5444092b8dea12fe0674a40bd4326d1daf426 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 17 Mar 2021 11:01:31 +0100 Subject: [PATCH 0781/1208] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 437294cd..c0a79fc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ UMI-tools: re-introduce samtools indexing ++ UMI-tools: update default dockerImage + UMI-tools (dedup): Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. From 2410d0d5c2415f234739f63bbef913f5f531eab7 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 17 Mar 2021 11:02:01 +0100 Subject: [PATCH 0782/1208] update dockerImage and re-introduce samtools indexing --- umi-tools.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index 6b3aa697..a09ca642 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -31,7 +31,7 @@ task Extract { Boolean threePrime = false Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" } command { @@ -87,7 +87,7 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - String dockerImage = "quay.io/biocontainers/umi_tools:1.1.1--py38h0213d0e_1" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" } String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") @@ -102,10 +102,12 @@ task Dedup { ~{"--umi-separator=" + umiSeparator} \ ~{true="--paired" false="" paired} \ --temp-dir=~{tmpDir} + samtools index ~{outputBamPath} ~{outputBamIndex} } output { File deduppedBam = outputBamPath + File deduppedBamIndex = outputBamIndex File? editDistance = "~{statsPrefix}_edit_distance.tsv" File? umiStats = "~{statsPrefix}_per_umi.tsv" File? positionStats = "~{statsPrefix}_per_umi_per_position.tsv" @@ -132,6 +134,7 @@ task Dedup { # outputs deduppedBam: {description: "Deduplicated BAM file."} + deduppedBamIndex: {description: "Index of the deduplicated BAM file."} editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."} umiStats: {description: "UMI-level summary statistics."} positionStats: {description: "The counts for unique combinations of UMI and position."} From 2a601648e8728305452e244bb95e296ad5d2441b Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 18 Mar 2021 10:59:24 +0100 Subject: [PATCH 0783/1208] update CHANGELOG --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0a79fc4..64f40df6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,9 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ UMI-tools: re-introduce samtools indexing -+ UMI-tools: update default dockerImage -+ UMI-tools (dedup): Add tempdir ++ UMI-tools (v1.1.1): re-introduce samtools indexing ++ UMI-tools (v1.1.1): update default dockerImage ++ UMI-tools dedup (v1.1.1): Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. From c791c96a60e6eee1c104cda7b884039a67be53d4 Mon Sep 17 00:00:00 2001 From: Cedrick Agaser <47602860+cagaser@users.noreply.github.com> Date: Thu, 18 Mar 2021 11:22:09 +0100 Subject: [PATCH 0784/1208] Update CHANGELOG.md Co-authored-by: Davy Cats --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64f40df6..c204ba24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,9 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ UMI-tools (v1.1.1): re-introduce samtools indexing -+ UMI-tools (v1.1.1): update default dockerImage -+ UMI-tools dedup (v1.1.1): Add tempdir ++ UMI-tools: re-introduce samtools indexing ++ UMI-tools: update default dockerImage to use umitools v1.1.1 ++ UMI-tools dedup: Add tempdir + Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). + Duphold: add duphold.wdl. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. From 359456efd96ccd2326657e5dec543c5a73efd92c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 19 Mar 2021 17:07:41 +0100 Subject: [PATCH 0785/1208] increase time and memory for picard collectWgsMetrics --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index 8dc4e0bf..aefb4c21 100644 --- a/picard.wdl +++ b/picard.wdl @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String memory = "33G" + String javaXmx = "32G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } From 55818f8742b709e9bb4007d4e529878ba0aa47e8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 22 Mar 2021 12:47:21 +0100 Subject: [PATCH 0786/1208] update dockerImage --- umi-tools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umi-tools.wdl b/umi-tools.wdl index a09ca642..86bf1314 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -31,7 +31,7 @@ task Extract { Boolean threePrime = false Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } command { @@ -87,7 +87,7 @@ task Dedup { String memory = "25G" Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) - String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:f9d5e41daab14b273ff04f257621890af6f82b93-0" + String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai") From 9c36780b3c24d40cb0ed7bb37c1c3b0c41d2269e Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 23 Mar 2021 09:57:29 +0100 Subject: [PATCH 0787/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c204ba24..f3b04d4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) + UMI-tools: re-introduce samtools indexing + UMI-tools: update default dockerImage to use umitools v1.1.1 + UMI-tools dedup: Add tempdir From 5db3dd912fbf3b8cdaefefe198a59e998ebdd89a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Mar 2021 11:15:41 +0100 Subject: [PATCH 0788/1208] update memory and timeMinutes for cutadapt and bwa --- bwa.wdl | 4 ++-- cutadapt.wdl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index faa4121a..cc8ea0c6 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 500 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 500 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.5) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..bca29db3 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -84,7 +84,7 @@ task Cutadapt { Int cores = 4 String memory = "~{300 + 100 * cores}M" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } From 2aba7899cdf1a76d2afa089e230335bf0843b72c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Mar 2021 12:59:54 +0100 Subject: [PATCH 0789/1208] increase memory bwa --- bwa.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bwa.wdl b/bwa.wdl index cc8ea0c6..670f00d2 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 3) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. From f83b315ebb5318147ce3f08d8ba0d313146753d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 30 Mar 2021 09:55:44 +0200 Subject: [PATCH 0790/1208] add more memory to sambamba markdup --- sambamba.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index b6ef5e9b..b4eca66b 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize + # Added 2024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 2048 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 0862eab38451da3de6615ad419ea56402690e7a0 Mon Sep 17 00:00:00 2001 From: dcats Date: Mon, 12 Apr 2021 16:50:33 +0200 Subject: [PATCH 0791/1208] memory and runtime adjustements --- bcftools.wdl | 4 ++-- gridss.wdl | 4 ++-- hmftools.wdl | 10 +++++----- sambamba.wdl | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 4827a631..28b62696 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "256M" - Int timeMinutes = 10 + ceil(size(inputFile, "G")) + String memory = "5G" + Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/gridss.wdl b/gridss.wdl index b4b36b01..11014a88 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -36,7 +36,7 @@ task GRIDSS { File? blacklistBed File? repeatmaskerBed - Int jvmHeapSizeGb = 30 + Int jvmHeapSizeGb = 64 Int threads = 4 Int timeMinutes = ceil(5760 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" @@ -70,7 +70,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 1}G" + memory: "~{jvmHeapSizeGb + 25}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } diff --git a/hmftools.wdl b/hmftools.wdl index dc31f41b..553879f9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "33G" - String javaXmx = "32G" + String memory = "52G" + String javaXmx = "50G" Int timeMinutes = 1200 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -341,7 +341,7 @@ task Linx { File svVcf File svVcfIndex Array[File]+ purpleOutput - File referenceFasta + File referenceFasta #FIXME Not used in pipeline5? File referenceFastaFai File referenceFastaDict String refGenomeVersion @@ -597,8 +597,8 @@ task Sage { File? coverageBed Int threads = 2 - String javaXmx = "32G" - String memory = "33G" + String javaXmx = "50G" + String memory = "75G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" } diff --git a/sambamba.wdl b/sambamba.wdl index b4eca66b..c8d9e11c 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -69,7 +69,7 @@ task Markdup { String outputPath Int compressionLevel = 1 # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1. - Int sortBufferSize = 2048 + Int sortBufferSize = 4096 Int ioBufferSize = 128 Boolean removeDuplicates = false @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 2024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 2048 + sortBufferSize + 2 * ioBufferSize + # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 22933762f7683b98535da38de2954db41c44be37 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:31:58 +0200 Subject: [PATCH 0792/1208] add germline options to purple --- hmftools.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 553879f9..e8b60bc0 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -452,6 +452,7 @@ task Purple { Array[File]+ cobaltOutput File gcProfile File somaticVcf + File germlineVcf File filteredSvVcf File fullSvVcf File fullSvVcfIndex @@ -460,6 +461,7 @@ task Purple { File referenceFastaDict File driverGenePanel File somaticHotspots + File germlineHotspots Int threads = 1 Int timeMinutes = 60 @@ -477,6 +479,7 @@ task Purple { -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ + -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ @@ -484,6 +487,7 @@ task Purple { -driver_catalog \ -driver_gene_panel ~{driverGenePanel} \ -somatic_hotspots ~{somaticHotspots} \ + -germline_hotspots ~{germlineHotspots} \ -threads ~{threads} } @@ -550,6 +554,7 @@ task Purple { cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"} gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"} somaticVcf: {description: "The somatic variant calling results.", category: "required"} + germlineVcf: {description: "The germline variant calling results.", category: "required"} filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"} fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} @@ -557,7 +562,8 @@ task Purple { category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - somaticHotspots: {description: "A vcf file with hotspot variant sites.", category: "required"} + somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} + germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0013a03155aed7748864308f9fda5b4f07d79706 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:37:54 +0200 Subject: [PATCH 0793/1208] remove ref_genome from Linx --- hmftools.wdl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index e8b60bc0..1a99caf6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -341,9 +341,6 @@ task Linx { File svVcf File svVcfIndex Array[File]+ purpleOutput - File referenceFasta #FIXME Not used in pipeline5? - File referenceFastaFai - File referenceFastaDict String refGenomeVersion String outputDir = "./linx" File fragileSiteCsv @@ -369,7 +366,6 @@ task Linx { -sample ~{sampleName} \ -sv_vcf ~{svVcf} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -ref_genome ~{referenceFasta} \ -ref_genome_version ~{refGenomeVersion} \ -output_dir ~{outputDir} \ -fragile_site_file ~{fragileSiteCsv} \ From bf43886539cb8d40d5b9637e3920ffba8d5f80a0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 13 Apr 2021 16:41:07 +0200 Subject: [PATCH 0794/1208] remove unused parameter_meta --- hmftools.wdl | 3 --- 1 file changed, 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1a99caf6..48c6099c 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -414,9 +414,6 @@ task Linx { svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} purpleOutput: {description: "The files produced by PURPLE.", category: "required"} - referenceFasta: {description: "The reference fasta file.", category: "required"} - referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} From a4d5102d42edf0d7d5795f5860817b38e680e597 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 14 Apr 2021 13:27:26 +0200 Subject: [PATCH 0795/1208] add gridss properties --- gridss.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 11014a88..ef5ae9e5 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,6 +35,7 @@ task GRIDSS { String? normalLabel File? blacklistBed File? repeatmaskerBed + File? gridssProperties Int jvmHeapSizeGb = 64 Int threads = 4 @@ -50,9 +51,10 @@ task GRIDSS { --reference ~{reference.fastaFile} \ --output ~{outputPrefix}.vcf.gz \ --assembly ~{outputPrefix}_assembly.bam \ + ~{"-c " + gridssProperties} \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ - --label ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ @@ -87,6 +89,7 @@ task GRIDSS { normalLabel: {description: "The name of the normal sample.", category: "advanced"} blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} + gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} From e81de32b4db6b48ff458f368b253010bcbff7187 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 15 Apr 2021 11:50:41 +0200 Subject: [PATCH 0796/1208] upgrade sage version --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 48c6099c..0a566d8e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -599,7 +599,7 @@ task Sage { String javaXmx = "50G" String memory = "75G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.6--0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" } command { From 51e524a7fa1ffe7664882941e7fc0ffc7aa14ad3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 20 Apr 2021 12:25:16 +0200 Subject: [PATCH 0797/1208] add missing purple outputs, fix typo --- hmftools.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 0a566d8e..3dd52daf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -192,13 +192,13 @@ task GripssApplicationKt { -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ - ~reference ~{normalName} \ + -reference ~{normalName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ -breakpoint_pon ~{breakpointPon} \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} } output { @@ -486,6 +486,7 @@ task Purple { output { File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" + File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" @@ -497,6 +498,8 @@ task Purple { File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" + File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" + File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" @@ -524,7 +527,7 @@ task Purple { purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, - purpleVersion] + purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, From 32c08100bcbf0590d7c1d69e08cdae2e3c640e99 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 May 2021 14:16:16 +0200 Subject: [PATCH 0798/1208] adjust runtime settings --- bcftools.wdl | 4 ++-- bwa.wdl | 4 ++-- extractSigPredictHRD.wdl | 4 ++-- gridss.wdl | 6 +++--- hmftools.wdl | 43 ++++++++++++++++++++-------------------- picard.wdl | 6 +++--- sambamba.wdl | 2 +- samtools.wdl | 2 ++ snpeff.wdl | 6 +++--- 9 files changed, 39 insertions(+), 38 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 28b62696..8fab933a 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "5G" - Int timeMinutes = 60 + ceil(size(inputFile, "G")) + String memory = "1G" + Int timeMinutes = 30 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bwa.wdl b/bwa.wdl index 670f00d2..1cb170b7 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 500 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 300 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 3) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 69c41ef8..2b5d9781 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -30,8 +30,8 @@ task ExtractSigPredictHRD { File svVcfIndex Boolean hg38 = false - String memory = "8G" - Int timeMinutes = 15 + String memory = "3G" + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" } diff --git a/gridss.wdl b/gridss.wdl index ef5ae9e5..acafc911 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -39,7 +39,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 4 - Int timeMinutes = ceil(5760 / threads) + 10 + Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" } @@ -72,7 +72,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 25}G" + memory: "~{jvmHeapSizeGb + 15}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -117,7 +117,7 @@ task AnnotateInsertedSequence { String javaXmx = "8G" String memory = "9G" String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2 / threads) + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) } command { diff --git a/hmftools.wdl b/hmftools.wdl index 3dd52daf..9b22c10d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "52G" String javaXmx = "50G" - Int timeMinutes = 1200 + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } @@ -112,9 +112,9 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = "9G" - String javaXmx = "8G" - Int timeMinutes = 1200 + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } @@ -181,9 +181,9 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 120 + String memory = "33G" + String javaXmx = "32G" + Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } @@ -236,9 +236,9 @@ task GripssHardFilterApplicationKt { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" - String memory = "25G" - String javaXmx = "24G" - Int timeMinutes = 120 + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" } @@ -274,7 +274,6 @@ task GripssHardFilterApplicationKt { } task HealthChecker { - # WIP input { String outputDir = "." String normalName @@ -285,9 +284,9 @@ task HealthChecker { File tumorMetrics Array[File]+ purpleOutput - String javaXmx = "10G" - String memory = "11G" - Int timeMinutes = 10 + String javaXmx = "2G" + String memory = "1G" + Int timeMinutes = 1 String dockerImage = "quay.io/biowdl/health-checker:3.2" } @@ -355,9 +354,9 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9G" - String javaXmx = "8G" - Int timeMinutes = 30 + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" } @@ -457,9 +456,9 @@ task Purple { File germlineHotspots Int threads = 1 - Int timeMinutes = 60 - String memory = "13G" - String javaXmx = "12G" + Int timeMinutes = 30 + String memory = "9G" + String javaXmx = "8G" String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" } @@ -600,8 +599,8 @@ task Sage { Int threads = 2 String javaXmx = "50G" - String memory = "75G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 10 / threads) #FIXME make sure this is enough + String memory = "60G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" } diff --git a/picard.wdl b/picard.wdl index aefb4c21..9a935045 100644 --- a/picard.wdl +++ b/picard.wdl @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "33G" - String javaXmx = "32G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) + String memory = "5G" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } diff --git a/sambamba.wdl b/sambamba.wdl index c8d9e11c..e78f50b6 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -84,7 +84,7 @@ task Markdup { # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 40) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } diff --git a/samtools.wdl b/samtools.wdl index 9042a0df..954b5d4e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -27,6 +27,7 @@ task BgzipAndIndex { String type = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String memory = "1G" String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -55,6 +56,7 @@ task BgzipAndIndex { outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/snpeff.wdl b/snpeff.wdl index 85709079..4a3640c7 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -36,9 +36,9 @@ task SnpEff { Boolean noShiftHgvs = false Int? upDownStreamLen - String memory = "50G" - String javaXmx = "49G" - Int timeMinutes = 60 #FIXME + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" } From ffda341fae7bc7cc519451b018e43a76cae34d8e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 21 May 2021 14:37:56 +0200 Subject: [PATCH 0799/1208] adjust runtime settings --- bcftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 8fab933a..059cc39d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,8 +47,8 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "1G" - Int timeMinutes = 30 + ceil(size(inputFile, "G")) + String memory = "2G" + Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c3df943f2964d2d5551baaf64c9bb2e2d9c198bf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 25 May 2021 13:01:25 +0200 Subject: [PATCH 0800/1208] update memory bcftools --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 059cc39d..5170a01f 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,7 +47,7 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "2G" + String memory = "4G" Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From ae1d2c02628d2239e79d24ecb78b4d4a3bcbc2d9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 28 May 2021 13:45:25 +0200 Subject: [PATCH 0801/1208] update changelog --- CHANGELOG.md | 2 ++ htseq.wdl | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22f41826..58d9f57f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ GffCompare: Make the `referenceAnnotation` input optional. ++ Stringtie: Add the `minimumCoverage` input. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. + Fixed the `size` call in the default for gffread's timeMinutes, to retrieve diff --git a/htseq.wdl b/htseq.wdl index ef4ae0a3..76d3bb83 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -23,7 +23,6 @@ version 1.0 task HTSeqCount { input { Array[File]+ inputBams - Array[File]+ inputBamIndexes File gtfFile String outputTable = "output.tsv" String order = "pos" From 24a6f1104c3a05053931b37db3fb8f3dd1e178b1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 31 May 2021 14:19:34 +0200 Subject: [PATCH 0802/1208] fix gffcompare --- gffcompare.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index 8b135479..5c83ba9d 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -91,7 +91,7 @@ task GffCompare { else 0 Int noInputFiles = length(inputGtfFiles) Boolean oneFile = (noFilesGtfList + noInputFiles) == 1 - String annotatedName = if oneFile + String annotatedName = if oneFile && defined(referenceAnnotation) then "annotated" else "combined" From 743acb0b89cc4893544965e3d93590978b414420 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 1 Jun 2021 12:13:09 +0200 Subject: [PATCH 0803/1208] Fix memory values. --- CHANGELOG.md | 1 + CPAT.wdl | 5 ++++- gffcompare.wdl | 3 +++ gffread.wdl | 3 +++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28e998e6..e47033c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Add memory values to GffCompare, GffRead and CPAT. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. + UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) diff --git a/CPAT.wdl b/CPAT.wdl index afb67853..4a6d4478 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -34,6 +34,7 @@ task CPAT { Array[String]? startCodons Array[String]? stopCodons + String memory = "4G" Int timeMinutes = 10 + ceil(size(gene, "G") * 30) String dockerImage = "biocontainers/cpat:v1.2.4_cv1" } @@ -60,8 +61,9 @@ task CPAT { } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { @@ -74,6 +76,7 @@ task CPAT { referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", category: "advanced"} startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gffcompare.wdl b/gffcompare.wdl index 5c83ba9d..aa7c7209 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -46,6 +46,7 @@ task GffCompare { Int? maxDistanceGroupingTranscriptStartSites String? namePrefix + String memory = "4G" Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" @@ -114,6 +115,7 @@ task GffCompare { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -140,6 +142,7 @@ task GffCompare { maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"} maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"} namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/gffread.wdl b/gffread.wdl index 967dd5c9..a04540f5 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,6 +32,7 @@ task GffRead { String? proteinFastaPath String? filteredGffPath + String memory = "4G" Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } @@ -64,6 +65,7 @@ task GffRead { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -78,6 +80,7 @@ task GffRead { CDSFastaPath: {description: "The location the CDS fasta should be written to.", category: "advanced"} proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"} filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 379d0be3671d7c6aee65b8e18a73798f1ef80733 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Jun 2021 10:25:03 +0200 Subject: [PATCH 0804/1208] fix some runtime settings --- cutadapt.wdl | 2 +- multiqc.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index b2dbdec0..b49a95d4 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -83,7 +83,7 @@ task Cutadapt { Boolean? noZeroCap Int cores = 4 - String memory = "~{300 + 100 * cores}M" + String memory = "5G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } diff --git a/multiqc.wdl b/multiqc.wdl index 2571463a..a1662937 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -57,7 +57,7 @@ task MultiQC { String? clConfig String? memory - Int timeMinutes = 2 + ceil(size(reports, "G") * 8) + Int timeMinutes = 10 + ceil(size(reports, "G") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } From e7400ced4a7e413f794e05a62c8e2c1261a0e7fc Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Mon, 7 Jun 2021 14:17:14 +0200 Subject: [PATCH 0805/1208] Move pacbio-merge image to quay.io Docker hub has started to remove unused images from free accounts, which means that it might remove images used by this pipeline without notice. Therefore the pipeline now exclusively uses images from quay.io or official repositories from docker hub, which do not have this limitation. --- pacbio.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pacbio.wdl b/pacbio.wdl index 7c0113fd..b21c69bc 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -26,7 +26,7 @@ task mergePacBio { String outputPathMergedReport String memory = "4G" - String dockerImage = "lumc/pacbio-merge:0.2" + String dockerImage = "quay.io/redmar_van_den_berg/pacbio-merge:0.2" } command { From a095517d6f9e729769e26e1bd7dd6385ac403fc1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Thu, 10 Jun 2021 16:20:34 +0200 Subject: [PATCH 0806/1208] Update tool versions. --- CHANGELOG.md | 18 ++++++++++++------ bam2fastx.wdl | 4 ++-- biowdl.wdl | 2 +- ccs.wdl | 2 +- common.wdl | 2 +- lima.wdl | 2 +- minimap2.wdl | 4 ++-- nanopack.wdl | 2 +- 8 files changed, 21 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e47033c6..9112c77d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,15 +10,21 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update biowdl-input-converter to version 0.3. ++ Update minimap2 to version 2.20. ++ Update lima to version 2.2.0. ++ Update ccs to version 6.0.0. ++ Update bam2fastx to version 1.3.1. + Add memory values to GffCompare, GffRead and CPAT. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. -+ UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) -+ UMI-tools: re-introduce samtools indexing -+ UMI-tools: update default dockerImage to use umitools v1.1.1 -+ UMI-tools dedup: Add tempdir -+ Update BCFTOOLS view: add options for filtering (include, exclude, excludeUncalled). -+ Duphold: add duphold.wdl. ++ UMI-tools: Update default dockerImage to use umitools v1.1.1 with correct + samtools version (1.10). ++ UMI-tools: Re-introduce samtools indexing. ++ UMI-tools: Update default dockerImage to use umitools v1.1.1. ++ UMI-tools dedup: Add tempdir. ++ Bcftools view: Add options for filtering (include, exclude, excludeUncalled). ++ Duphold: Add `duphold.wdl`. + Add new wdl file prepareShiny.wdl for creating input files for shiny app. + mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. + Lima: Fix copy commands. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 2ae22a57..0bdccca8 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -32,7 +32,7 @@ task Bam2Fasta { String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } command { @@ -100,7 +100,7 @@ task Bam2Fastq { String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.0--he1c1bb9_8" + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } command { diff --git a/biowdl.wdl b/biowdl.wdl index 06b1d756..dead8303 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -34,7 +34,7 @@ task InputConverter { String memory = "128M" Int timeMinutes = 1 - String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" + String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } command <<< diff --git a/ccs.wdl b/ccs.wdl index 4446937b..69095f4d 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -37,7 +37,7 @@ task CCS { Int threads = 2 String memory = "4G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" + String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } command { diff --git a/common.wdl b/common.wdl index 66bdb99c..54b11567 100644 --- a/common.wdl +++ b/common.wdl @@ -221,7 +221,7 @@ task YamlToJson { String memory = "128M" Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. - String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0" + String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } command { diff --git a/lima.wdl b/lima.wdl index 2455aaac..f6faf079 100644 --- a/lima.wdl +++ b/lima.wdl @@ -51,7 +51,7 @@ task Lima { Int threads = 2 String memory = "2G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/lima:2.0.0--0" + String dockerImage = "quay.io/biocontainers/lima:2.2.0--h9ee0642_0" } Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"} diff --git a/minimap2.wdl b/minimap2.wdl index d2e69905..50ff4db3 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -33,7 +33,7 @@ task Indexing { Int cores = 1 String memory = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } command { @@ -100,7 +100,7 @@ task Mapping { Int cores = 4 String memory = "30G" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.17--hed695b0_3" + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } command { diff --git a/nanopack.wdl b/nanopack.wdl index f86641b0..e4c94a43 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -42,7 +42,7 @@ task NanoPlot { Int threads = 2 String memory = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/nanoplot:1.32.1--py_0" + String dockerImage = "quay.io/biocontainers/nanoplot:1.38.0--pyhdfd78af_0" } Map[String, String] fileTypeOptions = {"fastq": "--fastq ", "fasta": "--fasta ", "fastq_rich": "--fastq_rich ", "fastq_minimal": "--fastq_minimal ", "summary": "--summary ", "bam": "--bam ", "ubam": "--ubam ", "cram": "--cram ", "pickle": "--pickle ", "feather": "--feather "} From 7251bf276a5ea5a3d140d0438fe9647db74ddbc0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 09:59:25 +0200 Subject: [PATCH 0807/1208] Fix lima output naming. --- lima.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lima.wdl b/lima.wdl index f6faf079..6b87ad4f 100644 --- a/lima.wdl +++ b/lima.wdl @@ -91,13 +91,13 @@ task Lima { dirName="$(dirname ~{outputPrefix})" find "$(cd ${dirName}; pwd)" -name "*.bam" > bamFiles.txt find "$(cd ${dirName}; pwd)" -name "*.bam.pbi" > bamIndexes.txt - find "$(cd ${dirName}; pwd)" -name "*.subreadset.xml" > subreadsets.txt + find "$(cd ${dirName}; pwd)" -name "*.consensusreadset.xml" > consensusreadset.txt >>> output { Array[File] limaBam = read_lines("bamFiles.txt") Array[File] limaBamIndex = read_lines("bamIndexes.txt") - Array[File] limaXml = read_lines("subreadsets.txt") + Array[File] limaXml = read_lines("consensusreadset.txt") File limaStderr = outputPrefix + ".lima.stderr.log" File limaJson = outputPrefix + ".json" File limaCounts = outputPrefix + ".lima.counts" From 460d3d04e2aa83bac9b5ddfa708463a7a1713394 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 12:19:52 +0200 Subject: [PATCH 0808/1208] Update scripts. --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 85e2ec54..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 85e2ec542b65be5f2a25c22db05c28700fbe6db5 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From 6356c481cdb8d42820476fe7249f77d1e48bd9d2 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 18:01:39 +0200 Subject: [PATCH 0809/1208] Fix outputs in ccs. --- CHANGELOG.md | 1 + ccs.wdl | 37 ++++++++++++++++++++++++++++++++----- scripts | 2 +- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e47033c6..c4eb9ac5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Fix output files in ccs.wdl. + Add memory values to GffCompare, GffRead and CPAT. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. diff --git a/ccs.wdl b/ccs.wdl index 4446937b..29f1a7f9 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -24,12 +24,19 @@ task CCS { input { File subreadsFile String outputPrefix + String logLevel = "WARN" Int minPasses = 3 + Int topPasses = 60 Int minLength = 10 Int maxLength = 50000 Boolean byStrand = false + Boolean skipPolish = false + Boolean all = false + Boolean subreadFallback = false + Boolean allKinetics = false + Boolean hifiKinetics = false + Float minSnr = 2.5 Float minReadQuality = 0.99 - String logLevel = "WARN" File? subreadsIndexFile String? chunkString @@ -37,7 +44,7 @@ task CCS { Int threads = 2 String memory = "4G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/pbccs:5.0.0--0" + String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } command { @@ -45,15 +52,24 @@ task CCS { mkdir -p "$(dirname ~{outputPrefix})" ccs \ --min-passes ~{minPasses} \ + --min-snr ~{minSnr} \ + --top-passes ~{topPasses} \ --min-length ~{minLength} \ --max-length ~{maxLength} \ ~{true="--by-strand" false="" byStrand} \ + ~{true="--skip-polish" false="" skipPolish} \ + ~{true="--all" false="" all} \ + ~{true="--subread-fallback" false="" subreadFallback} \ + ~{true="--all-kinetics" false="" allKinetics} \ + ~{true="--hifi-kinetics" false="" hifiKinetics} \ --min-rq ~{minReadQuality} \ --log-level ~{logLevel} \ --num-threads ~{threads} \ ~{"--chunk " + chunkString} \ + ~{"--report-file " + outputPrefix + ".ccs_report.txt"} \ ~{"--report-json " + outputPrefix + ".ccs.report.json"} \ ~{"--log-file " + outputPrefix + ".ccs.stderr.log"} \ + ~{"--metrics-json " + outputPrefix + ".zmw_metrics.json.gz"} \ ~{subreadsFile} \ ~{outputPrefix + ".ccs.bam"} } @@ -61,8 +77,10 @@ task CCS { output { File ccsBam = outputPrefix + ".ccs.bam" File ccsBamIndex = outputPrefix + ".ccs.bam.pbi" - File ccsReport = outputPrefix + ".ccs.report.json" + File ccsReport = outputPrefix + ".ccs_report.txt" + File ccsJsonReport = outputPrefix + ".ccs.report.json" File ccsStderr = outputPrefix + ".ccs.stderr.log" + File zmwMetrics = outputPrefix + ".zmw_metrics.json.gz" } runtime { @@ -76,12 +94,19 @@ task CCS { # inputs subreadsFile: {description: "Subreads input file.", category: "required"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} minPasses: {description: "Minimum number of full-length subreads required to generate ccs for a ZMW.", category: "advanced"} + topPasses: {description: "Pick at maximum the top N passes for each ZMW.", category: "advanced"} minLength: {description: "Minimum draft length before polishing.", category: "advanced"} maxLength: {description: "Maximum draft length before polishing.", category: "advanced"} byStrand: {description: "Generate a consensus for each strand.", category: "advanced"} + skipPolish: {description: "Only output the initial draft template (faster, less accurate).", category: "advanced"} + all: {description: "Emit all ZMWs.", category: "advanced"} + subreadFallback: {description: "Emit a representative subread, instead of the draft consensus, if polishing failed.", category: "advanced"} + allKinetics: {description: "Calculate mean pulse widths (PW) and interpulse durations (IPD) for every ZMW.", category: "advanced"} + hifiKinetics: {description: "Calculate mean pulse widths (PW) and interpulse durations (IPD) for every HiFi read.", category: "advanced"} + minSnr: {description: "Minimum SNR of subreads to use for generating CCS.", category: "advanced"} minReadQuality: {description: "Minimum predicted accuracy in [0, 1].", category: "common"} - logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"} subreadsIndexFile: {description: "Index for the subreads input file, required when using chunkString.", category: "advanced"} chunkString: {descpription: "Chunk string (e.g. 1/4, 5/5) for CCS.", category: "advanced"} threads: {description: "The number of threads to be used.", category: "advanced"} @@ -92,7 +117,9 @@ task CCS { # outputs ccsBam: {description: "Consensus reads output file."} ccsBamIndex: {description: "Index of consensus reads output file."} - ccsReport: {description: "Ccs results report file."} + ccsReport: {description: "Ccs report file."} + ccsJsonReport: {description: "Ccs results json report file."} ccsStderr: {description: "Ccs STDERR log file."} + zmwMetrics: {description: "ZMW metrics json file."} } } diff --git a/scripts b/scripts index 85e2ec54..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 85e2ec542b65be5f2a25c22db05c28700fbe6db5 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From 833ad0bf47f9c42e33743ed5b0de7851ef66bbf1 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Fri, 11 Jun 2021 18:06:21 +0200 Subject: [PATCH 0810/1208] Remove weird line. --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4bade66..83da5399 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -<<<<<<< HEAD + Fix output files in ccs.wdl. + Update biowdl-input-converter to version 0.3. + Update minimap2 to version 2.20. From 4f879f72aec90d36d0201e9c1b54154f9decb757 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 12:40:32 +0200 Subject: [PATCH 0811/1208] Try to adjust localization. --- CHANGELOG.md | 7 ++++++- bam2fastx.wdl | 12 ++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83da5399..11a92d83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,12 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Fix output files in ccs.wdl. ++ Change the way localization of the input bam files and index are handled + in the bam2fastx tasks. ++ Add new parameters from CCS version 6.0.0 and add two new outputs: + `ccs_report.txt` & `zmw_metrics.json.gz`. ++ Change CutAdapt memory to `5G`. ++ Increase multiqc base time from 5 to 10. + Update biowdl-input-converter to version 0.3. + Update minimap2 to version 2.20. + Update lima to version 2.2.0. diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0bdccca8..4a2ecf87 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - ln $bamFile . - bamFiles=$bamFiles" $(basename $bamFile)" + cp $bamFile ./ + bamFiles=$bamFiles" ./$(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln $index . + cp $index ./ done bam2fasta \ @@ -112,13 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - ln $bamFile . - bamFiles=$bamFiles" $(basename $bamFile)" + cp $bamFile ./ + bamFiles=$bamFiles" ./$(basename $bamFile)" done for index in ~{sep=" " bamIndex} do - ln $index . + cp $index ./ done bam2fastq \ From a422e52920dc8fa2d2614f632962dec37964b939 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 14:23:57 +0200 Subject: [PATCH 0812/1208] Try a different approach. --- bam2fastx.wdl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 4a2ecf87..4e5ed3ed 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -35,7 +35,7 @@ task Bam2Fasta { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - cp $bamFile ./ - bamFiles=$bamFiles" ./$(basename $bamFile)" + ln -s ${bamFile} ./ + bamFiles=${bamFiles}" ./$(basename ${bamFile})" done - for index in ~{sep=" " bamIndex} + for indexFile in ~{sep=" " bamIndex} do - cp $index ./ + ln -s ${indexFile} ./ done bam2fasta \ @@ -58,8 +58,8 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - $bamFiles - } + ${bamFiles} + >>> output { File fastaFile = outputPrefix + ".fasta.gz" @@ -103,7 +103,7 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -112,13 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - cp $bamFile ./ - bamFiles=$bamFiles" ./$(basename $bamFile)" + ln -s ${bamFile} ./ + bamFiles=${bamFiles}" ./$(basename ${bamFile})" done - for index in ~{sep=" " bamIndex} + for indexFile in ~{sep=" " bamIndex} do - cp $index ./ + ln -s ${indexFile} ./ done bam2fastq \ @@ -126,8 +126,8 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - $bamFiles - } + ${bamFiles} + >>> output { File fastqFile = outputPrefix + ".fastq.gz" From adad218bbd6f501b0194107adf81cc9588ba91ba Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 15:02:41 +0200 Subject: [PATCH 0813/1208] Test tool without localization. --- bam2fastx.wdl | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 4e5ed3ed..3cdb29fb 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -103,31 +103,16 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command <<< + command { set -e mkdir -p "$(dirname ~{outputPrefix})" - - # Localise the bam and pbi files so they are next to each other in the - # current folder. - bamFiles="" - for bamFile in ~{sep=" " bam} - do - ln -s ${bamFile} ./ - bamFiles=${bamFiles}" ./$(basename ${bamFile})" - done - - for indexFile in ~{sep=" " bamIndex} - do - ln -s ${indexFile} ./ - done - bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} - >>> + ~{bam} + } output { File fastqFile = outputPrefix + ".fastq.gz" From adee85e2cfe420ba3a7be24f764233597d00a74a Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 15:08:35 +0200 Subject: [PATCH 0814/1208] Fix array. --- bam2fastx.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 3cdb29fb..b09f7a0f 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -111,7 +111,7 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{bam} + ~{sep=" " bam} } output { From 734c4037e642bf318b249f8835f2042c40ff328d Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Mon, 14 Jun 2021 15:49:53 +0200 Subject: [PATCH 0815/1208] Try another approach. --- bam2fastx.wdl | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index b09f7a0f..110441ec 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -103,16 +103,26 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command { + command <<< set -e mkdir -p "$(dirname ~{outputPrefix})" + + # Localise the bam and pbi files so they are next to each other in the + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam} + do + fullPathBam=$(readlink -f ${bamFile}) + bamFiles=${bamFiles}" ${fullPathBam}" + done + bam2fastq \ --output ~{outputPrefix} \ -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ~{sep=" " bam} - } + ${bamFiles} + >>> output { File fastqFile = outputPrefix + ".fastq.gz" From 235fb43f046b285a3b5d8ca702b2cc8ad64dcf36 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 15 Jun 2021 11:18:37 +0200 Subject: [PATCH 0816/1208] Revert changes to WDL file. --- bam2fastx.wdl | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 110441ec..0bdccca8 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -35,7 +35,7 @@ task Bam2Fasta { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command <<< + command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -44,13 +44,13 @@ task Bam2Fasta { bamFiles="" for bamFile in ~{sep=" " bam} do - ln -s ${bamFile} ./ - bamFiles=${bamFiles}" ./$(basename ${bamFile})" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" done - for indexFile in ~{sep=" " bamIndex} + for index in ~{sep=" " bamIndex} do - ln -s ${indexFile} ./ + ln $index . done bam2fasta \ @@ -58,8 +58,8 @@ task Bam2Fasta { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} - >>> + $bamFiles + } output { File fastaFile = outputPrefix + ".fasta.gz" @@ -103,7 +103,7 @@ task Bam2Fastq { String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } - command <<< + command { set -e mkdir -p "$(dirname ~{outputPrefix})" @@ -112,8 +112,13 @@ task Bam2Fastq { bamFiles="" for bamFile in ~{sep=" " bam} do - fullPathBam=$(readlink -f ${bamFile}) - bamFiles=${bamFiles}" ${fullPathBam}" + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" + done + + for index in ~{sep=" " bamIndex} + do + ln $index . done bam2fastq \ @@ -121,8 +126,8 @@ task Bam2Fastq { -c ~{compressionLevel} \ ~{true="--split-barcodes" false="" splitByBarcode} \ ~{"--seqid-prefix " + seqIdPrefix} \ - ${bamFiles} - >>> + $bamFiles + } output { File fastqFile = outputPrefix + ".fastq.gz" From f2f7411a7b32bda18bba6eb8ee83606fa635f9e0 Mon Sep 17 00:00:00 2001 From: JasperBoom Date: Tue, 15 Jun 2021 12:43:37 +0200 Subject: [PATCH 0817/1208] Add directory creation to samtools fastq. --- CHANGELOG.md | 3 +-- samtools.wdl | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11a92d83..e7242699 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,8 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- -+ Change the way localization of the input bam files and index are handled - in the bam2fastx tasks. ++ Samtools: Add mkdir line to `Fastq` task. + Add new parameters from CCS version 6.0.0 and add two new outputs: `ccs_report.txt` & `zmw_metrics.json.gz`. + Change CutAdapt memory to `5G`. diff --git a/samtools.wdl b/samtools.wdl index 9042a0df..46d1eb70 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -122,6 +122,8 @@ task Fastq { } command { + set -e + mkdir -p "$(dirname ~{outputRead1})" samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-2 " + outputRead2} \ From de03877e2e831285daaccc820db98da0897e1dac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Jun 2021 13:53:25 +0200 Subject: [PATCH 0818/1208] add cuppa and cuppa chart --- hmftools.wdl | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 9b22c10d..779820a3 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -168,6 +168,112 @@ task Cobalt { } } +task Cuppa { + input { + Array[File]+ linxOutput + Array[File]+ purpleOutput + String sampleName + Array[String]+ categories = ["DNA"] + Array[File]+ referenceData + File purpleSvVcf + File purpleSvVcfIndex + File purpleSomaticVcf + File purpleSomaticVcfIndex + String outputDir = "./cuppa" + + String javaXmx = "4G" + String memory = "5G" + Int time_minutes = 10 + String dockerImage = "quay.io/biowdl/cuppa:1.4" + } + + command { + set -e + mkdir -p sampleData ~{outputDir} + ln -s -t sampleData ~{sep=" " linxOutput} ~{sep=" " purpleOutput} + cuppa -Xmx~{javaXmx} \ + -output_dir ~{outputDir} \ + -output_id ~{sampleName} \ + -categories '~{sep="," categories}' \ + -ref_data_dir ~{sub(referenceData[0], basename(referenceData[0]), "")} \ + -sample_data_dir sampleData \ + -sample_data ~{sampleName} \ + -sample_sv_file ~{purpleSvVcf} \ + -sample_somatic_vcf ~{purpleSomaticVcf} + } + + output { + File cupData = "~{outputDir}/~{sampleName}.cup.data.csv" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + linxOutput: {description: "The files produced by linx.", category: "required"} + purpleOutput: {description: "The files produced by purple.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + categories: {description: "The classifiers to use.", category: "advanced"} + referenceData : {description: "The reference data.", category: "required"} + purpleSvVcf: {description: "The VCF file produced by purple which contains structural variants.", category: "required"} + purpleSvVcfIndex: {description: "The index of the structural variants VCF file produced by purple.", category: "required"} + purpleSomaticVcf: {description: "The VCF file produced by purple which contains somatic variants.", category: "required"} + purpleSomaticVcfIndex: {description: "The index of the somatic VCF file produced by purple.", category: "required"} + outputDir: {description: "The directory the ouput will be placed in.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task CuppaChart { + input { + String sampleName + File cupData + String outputDir = "./cuppa" + + String memory = "4G" + Int time_minutes = 5 + String dockerImage = "quay.io/biowdl/cuppa:1.4" + } + + command { + set -e + mkdir -p ~{outputDir} + cuppa-chart \ + -sample ~{sampleName} + -sample_data ~{cupData} + -output_dir ~{outputDir} + } + + output { + File cuppaChart = "~{outputDir}/~{sampleName}.cuppa.chart.png" + File cuppaConclusion = "~{outputDir}/~{sampleName}.cuppa.conclusion.txt" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The name of the sample.", category:"common"} + cupData: {description: "The cuppa output.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category:"common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GripssApplicationKt { input { File inputVcf From c0477edfd5904f1de11d7ea0d60e8b65e36e0bed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 16 Jun 2021 10:25:47 +0200 Subject: [PATCH 0819/1208] fix typo --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 779820a3..8beb5c76 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -183,7 +183,7 @@ task Cuppa { String javaXmx = "4G" String memory = "5G" - Int time_minutes = 10 + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.4" } @@ -239,7 +239,7 @@ task CuppaChart { String outputDir = "./cuppa" String memory = "4G" - Int time_minutes = 5 + Int timeMinutes = 5 String dockerImage = "quay.io/biowdl/cuppa:1.4" } From 1be4badcf451ccad2d2198dbfec4d97aaf68af45 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 21 Jun 2021 11:54:52 +0200 Subject: [PATCH 0820/1208] increase memory for amber --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8beb5c76..868d03fe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "52G" - String javaXmx = "50G" + String memory = "70G" + String javaXmx = "64G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 7a693a69f9a59755d527d733946406eed3a2f124 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Jun 2021 11:38:04 +0200 Subject: [PATCH 0821/1208] remove rainfall plot output --- hmftools.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 868d03fe..8e60351b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -615,7 +615,6 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File purpleVersion = "~{outputDir}/purple.version" File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" @@ -634,7 +633,7 @@ task Purple { purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, - segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot] + segmentPlot, somaticClonalityPlot, somaticPlot] Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, circosSnp] From 664325fc50d19e074d80780cae322157f07035ed Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 22 Jun 2021 13:34:07 +0200 Subject: [PATCH 0822/1208] fix missing backslashes --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8e60351b..1b9d8d22 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -247,8 +247,8 @@ task CuppaChart { set -e mkdir -p ~{outputDir} cuppa-chart \ - -sample ~{sampleName} - -sample_data ~{cupData} + -sample ~{sampleName} \ + -sample_data ~{cupData} \ -output_dir ~{outputDir} } From 5e29a653559f7b7cc0f1e2fc787bbf8a8117f306 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 25 Jun 2021 07:40:37 +0200 Subject: [PATCH 0823/1208] Set defaults for boolean values --- spades.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/spades.wdl b/spades.wdl index 7cc16d21..1f246d48 100644 --- a/spades.wdl +++ b/spades.wdl @@ -34,16 +34,16 @@ task Spades { File? tslrContigs File? trustedContigs File? untrustedContigs - Boolean? singleCell - Boolean? metagenomic - Boolean? rna - Boolean? plasmid - Boolean? ionTorrent - Boolean? onlyErrorCorrection - Boolean? onlyAssembler - Boolean? careful - Boolean? disableGzipOutput - Boolean? disableRepeatResolution + Boolean singleCell = False + Boolean metagenomic = False + Boolean rna = False + Boolean plasmid = False + Boolean ionTorrent = False + Boolean onlyErrorCorrection = False + Boolean onlyAssembler = False + Boolean careful = False + Boolean disableGzipOutput = False + Boolean disableRepeatResolution = False File? dataset File? tmpDir String? k From 2ebde5f0a1997a098f89370989bdbbcf242ac207 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 28 Jun 2021 09:51:24 +0200 Subject: [PATCH 0824/1208] survivor: line 47-49, change integer to string literal --- survivor.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/survivor.wdl b/survivor.wdl index 8b0360d8..de232405 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -44,9 +44,9 @@ task Merge { fileList \ ~{breakpointDistance} \ ~{suppVecs} \ - ~{true=1 false=0 svType} \ - ~{true=1 false=0 strandType} \ - ~{true=1 false=0 distanceBySvSize} \ + ~{true='1' false='0' svType} \ + ~{true='1' false='0' strandType} \ + ~{true='1' false='0' distanceBySvSize} \ ~{minSize} \ ~{outputPath} } From da28f9399252cb8777abc630fe8c34e406d13da3 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 28 Jun 2021 10:56:04 +0200 Subject: [PATCH 0825/1208] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e7242699..ad3d30fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Survivor: Change integer to string literal in boolean parameters. + Samtools: Add mkdir line to `Fastq` task. + Add new parameters from CCS version 6.0.0 and add two new outputs: `ccs_report.txt` & `zmw_metrics.json.gz`. From 545f63af658df8fc515672589a7bfb7e81ed2be3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 28 Jun 2021 11:36:52 +0200 Subject: [PATCH 0826/1208] update some version and add repeatmasker annotation for gridss --- gridss.wdl | 123 +++++++++++++++++++++++++++++++-------------------- hmftools.wdl | 9 ++-- 2 files changed, 79 insertions(+), 53 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index acafc911..3844c602 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -22,6 +22,61 @@ version 1.0 import "bwa.wdl" as bwa +task AnnotateInsertedSequence { + input { + File inputVcf + String outputPath = "gridss.annotated.vcf.gz" + File viralReference + File viralReferenceFai + File viralReferenceDict + File viralReferenceImg + + Int threads = 8 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + } + + command { + AnnotateInsertedSequence -Xmx~{javaXmx} \ + REFERENCE_SEQUENCE=~{viralReference} \ + INPUT=~{inputVcf} \ + OUTPUT=~{outputPath} \ + ALIGNMENT=APPEND \ + WORKING_DIR='.' \ + WORKER_THREADS=~{threads} + } + + output { + File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + inputVcf: {description: "The input VCF file.", category: "required"} + outputPath: {description: "The path the output will be written to.", category: "common"} + viralReference: {description: "A fasta file with viral sequences.", category: "required"} + viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} + viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} + viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { File tumorBam @@ -34,13 +89,12 @@ task GRIDSS { File? normalBai String? normalLabel File? blacklistBed - File? repeatmaskerBed File? gridssProperties Int jvmHeapSizeGb = 64 Int threads = 4 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" } command { @@ -56,7 +110,6 @@ task GRIDSS { ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ ~{"--blacklist " + blacklistBed} \ - ~{"--repeatmaskerbed " + repeatmaskerBed} \ ~{normalBam} \ ~{tumorBam} tabix -p vcf ~{outputPrefix}.vcf.gz @@ -88,7 +141,6 @@ task GRIDSS { normalBai: {description: "The index for normalBam.", category: "advanced"} normalLabel: {description: "The name of the normal sample.", category: "advanced"} blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"} - repeatmaskerBed: {description: "A bed file containing the repeatmasker database.", category: "advanced"} gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} @@ -104,64 +156,37 @@ task GRIDSS { } } -task AnnotateInsertedSequence { +task GridssAnnotateVcfRepeatmasker { input { - File inputVcf - String outputPath = "gridss.annotated.vcf.gz" - File viralReference - File viralReferenceFai - File viralReferenceDict - File viralReferenceImg + File gridssVcf + File gridssVcfIndex + String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - Int threads = 8 - String javaXmx = "8G" - String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.9.4--0" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + String memory = "4G" + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3) } command { - java -Xmx~{javaXmx} \ - -Dsamjdk.create_index=true \ - -Dsamjdk.use_async_io_read_samtools=true \ - -Dsamjdk.use_async_io_write_samtools=true \ - -Dsamjdk.use_async_io_write_tribble=true \ - -Dsamjdk.buffer_size=4194304 \ - -cp /usr/local/share/gridss-2.9.4-0/gridss.jar \ - gridss.AnnotateInsertedSequence \ - REFERENCE_SEQUENCE=~{viralReference} \ - INPUT=~{inputVcf} \ - OUTPUT=~{outputPath} \ - ALIGNMENT=APPEND \ - WORKING_DIR='.' \ - WORKER_THREADS=~{threads} + gridss_annotate_vcf_repeatmasker + --output ~{outputPath} \ + --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + -w . \ + ~{gridssVcf} } output { - File outputVcf = outputPath - File outputVcfIndex = outputPath + ".tbi" + File annotatedVcf = outputPath + File annotatedVcfIndex = "~{outputPath}.tbi" } runtime { - cpu: threads - memory: memory - time_minutes: timeMinutes # !UnknownRuntimeKey - docker: dockerImage - } - - parameter_meta { - inputVcf: {description: "The input VCF file.", category: "required"} - outputPath: {description: "The path the output will be written to.", category: "common"} - viralReference: {description: "A fasta file with viral sequences.", category: "required"} - viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"} - viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"} - viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"} - + gridssVcf: {description: "The GRIDSS output.", category: "required"} + gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} +} \ No newline at end of file diff --git a/hmftools.wdl b/hmftools.wdl index 1b9d8d22..7d6f1547 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -304,7 +304,8 @@ task GripssApplicationKt { -breakend_pon ~{breakendPon} \ -breakpoint_pon ~{breakpointPon} \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} \ + -paired_normal_tumor_ordinals } output { @@ -463,7 +464,7 @@ task Linx { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.13--0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.15--hdfd78af_0" } command { @@ -565,7 +566,7 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.52--0" + String dockerImage = "quay.io/biocontainers/hmftools-purple:2.54--hdfd78af_0" } command { @@ -706,7 +707,7 @@ task Sage { String javaXmx = "50G" String memory = "60G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.7--0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } command { From 04c65ab38a2d91051e3c0aa90c67738b755a4921 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 28 Jun 2021 15:29:21 +0200 Subject: [PATCH 0827/1208] add virusbreakend --- gridss.wdl | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 3844c602..52e039d1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -164,7 +164,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3) + Int timeMinutes = 1 + ceil(size(gridssVcf, "G") * 3) } command { @@ -181,6 +181,12 @@ task GridssAnnotateVcfRepeatmasker { } runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { gridssVcf: {description: "The GRIDSS output.", category: "required"} gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} @@ -189,4 +195,57 @@ task GridssAnnotateVcfRepeatmasker { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} + +task Virusbreakend { + input { + File bam + File bamIndex + File referenceFasta + File virusbreakendDB + String outputPath = "./virusbreakend.vcf" + + String memory = "75G" + Int threads = 8 + String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + Int timeMinutes = 180 + } + + command { + mkdir virusbreakenddb + tar -xzvf ~{virusbreakendDB} -C virusbreakenddb --strip-components 1 + virusbreakend \ + --output ~{outputPath} \ + --workingdir . \ + --reference ~{referenceFasta} \ + --db virusbreakenddb \ + --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + -t ~{threads} \ + ~{bam} + } + + output { + File vcf = outputPath + File summary = "~{outputPath}.summary.tsv" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + bam: {description: "A BAM file.", category: "required"} + bamIndex: {description: "The index for the BAM file.", category: "required"} + referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + threads: {description: "The number of the threads to use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From c2f223eb6a487d7c5bca957bdaaf830d0522d3cb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Jun 2021 13:26:01 +0200 Subject: [PATCH 0828/1208] add virusinterpreter --- hmftools.wdl | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 7d6f1547..f1617bbe 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -778,3 +778,54 @@ task Sage { category: "advanced"} } } + +task VirusInterpreter { + input { + String sampleId + File virusBreakendTsv + File taxonomyDbTsv + File virusInterpretationTsv + File virusBlacklistTsv + String outputDir = "." + + String memory = "3G" + String javaXmx = "2G" + Int timeMinutes = 15 + String dockerImage = "quay.io/biowdl/virus-interpreter:1.0" + } + + command { + virus-interpreter -Xmx~{javaXmx} \ + -sample_id ~{sampleId} \ + -virus_breakend_tsv ~{virusBreakendTsv} \ + -taxonomy_db_tsv ~{taxonomyDbTsv} \ + -virus_interpretation_tsv ~{virusInterpretationTsv} \ + -virus_blacklist_tsv ~{virusBlacklistTsv} \ + -output_dir ~{outputDir} + } + + output { + File virusAnnotatedTsv = "~{outputDir}/~{sampleId}.virus.annotated.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + sampleId: {description: "The name of the sample.", category: "required"} + virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"} + taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"} + virusInterpretationTsv: {description: "A virus interpretation tsv.", category: "required"} + virusBlacklistTsv: {description: "A virus blacklist tsv.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category: "required"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From f169d78589c3e4d2a97892cfc3fb685d6c217d6c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 29 Jun 2021 16:02:28 +0200 Subject: [PATCH 0829/1208] add protect --- hmftools.wdl | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index f1617bbe..646d01ea 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -542,6 +542,101 @@ task Linx { } } +task Protect { + input { + String refGenomeVersion + String tumorName + String normalName + Array[String]+ sampleDoids + String outputDir = "." + Array[File]+ serveActionability + File doidsJson + File purplePurity + File purpleQc + File purpleDriverCatalogSomatic + File purpleDriverCatalogGermline + File purpleSomaticVariants + File purpleSomaticVariantsIndex + File purpleGermlineVariants + File purpleGermlineVariantsIndex + File purpleGeneCopyNumber + File linxFusion + File linxBreakend + File linxDriversCatalog + File chordPrediction + File annotatedVirus + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biowdl/protect:v1.4" + } + + command { + protect -Xmx~{javaXmx} \ + -ref_genome_version ~{refGenomeVersion} \ + -tumor_sample_id ~{tumorName} \ + -reference_sample_id ~{normalName} \ + -primary_tumor_doids ~{sep=";" sampleDoids} \ + -output_dir ~{outputDir} \ + -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ + -doid_json ~{doidsJson} \ + -purple_purity_tsv ~{purplePurity} \ + -purple_qc_file ~{purpleQc} \ + -purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \ + -purple_germline_driver_catalog_tsv ~{purpleDriverCatalogGermline} \ + -purple_somatic_variant_vcf ~{purpleSomaticVariants} \ + -purple_germline_variant_vcf ~{purpleGermlineVariants} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumber} \ + -linx_fusion_tsv ~{linxFusion} \ + -linx_breakend_tsv ~{linxBreakend} \ + -linx_driver_catalog_tsv ~{linxDriversCatalog} \ + -chord_prediction_txt ~{chordPrediction} \ + -annotated_virus_tsv ~{annotatedVirus} + } + + output { + File protectTsv = "~{outputDir}/~{tumorName}.protect.tsv" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + normalName: {description: "The name of the normal sample.", category: "required"} + sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"} + doidsJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + purplePurity: {description: "The purity file generated by purple.", category: "required"} + purpleQc: {description: "The QC file generated by purple.", category: "required"} + purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"} + purpleDriverCatalogGermline: {description: "The germline driver catalog generated by purple.", category: "required"} + purpleSomaticVariants: {description: "The somatic VCF generated by purple.", category: "required"} + purpleSomaticVariantsIndex: {description: "The index for the somatic VCF generated by purple.", category: "required"} + purpleGermlineVariants: {description: "The germline VCF generated by purple.", category: "required"} + purpleGermlineVariantsIndex: {description: "The index of the germline VCF generated by purple.", category: "required"} + purpleGeneCopyNumber: {description: "The gene copy number file generated by purple.", category: "required"} + linxFusion: {description: "The fusion file generated by linx.", category: "required"} + linxBreakend: {description: "The breakend file generated by linx.", category: "required"} + linxDriversCatalog: {description: "The driver catalog generated generated by linx.", category: "required"} + chordPrediction: {description: "The chord prediction file.", category: "required"} + annotatedVirus: {description: "The virus-interpreter output.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Purple { input { String normalName From 47c89884e700c1c7ad11ba26e195d7812a6f1fac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 30 Jun 2021 14:27:10 +0200 Subject: [PATCH 0830/1208] update CPAT to 3.0.4 --- CHANGELOG.md | 2 ++ CPAT.wdl | 17 +++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28e998e6..112b8f04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 5.0.0-dev --------------------------- ++ Update CPAT to version 3.0.4. + + Changed the `outFilePath` input to `outputPrefix`. + GffCompare: Make the `referenceAnnotation` input optional. + Stringtie: Add the `minimumCoverage` input. + UMI-tools: update default dockerImage to use umitools v1.1.1 with correct samtools version (1.10) diff --git a/CPAT.wdl b/CPAT.wdl index afb67853..b3414bc5 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -23,7 +23,7 @@ version 1.0 task CPAT { input { File gene - String outFilePath + String outputPrefix File hex File logitModel @@ -35,7 +35,7 @@ task CPAT { Array[String]? stopCodons Int timeMinutes = 10 + ceil(size(gene, "G") * 30) - String dockerImage = "biocontainers/cpat:v1.2.4_cv1" + String dockerImage = "biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } # Some WDL magic in the command section to properly output the start and @@ -47,7 +47,7 @@ task CPAT { mkdir -p "$(dirname ~{outFilePath})" cpat.py \ --gene ~{gene} \ - --outfile ~{outFilePath} \ + --outfile ~{outputPrefix} \ --hex ~{hex} \ --logitModel ~{logitModel} \ ~{"--ref " + referenceGenome} \ @@ -56,7 +56,11 @@ task CPAT { } output { - File outFile = outFilePath + File orfSeqs = "~{outputPrefix}.ORF_seqs.fa" + File orfProb = "~{outputPrefix}.ORF_prob.tsv" + File orfProbBest = "~{outputPrefix}.ORF_prob.best.tsv" + File noOrf = "~{outputPrefix}.no_ORF.txt" + File rScript = "~{outputPrefix}.r" } runtime { @@ -67,7 +71,7 @@ task CPAT { parameter_meta { # inputs gene: {description: "Equivalent to CPAT's `--gene` option.", category: "required"} - outFilePath: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} + outputPrefix: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} hex: {description: "Equivalent to CPAT's `--hex` option.", category: "required"} logitModel: {description: "Equivalent to CPAT's `--logitModel` option.", category: "required"} referenceGenome: {description: "Equivalent to CPAT's `--ref` option.", category: "advanced"} @@ -76,9 +80,6 @@ task CPAT { stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # outputs - outFile: {description: "CPAT logistic regression model."} } } From d4d36e02f167fc1676071d67c6749feee44c510d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 30 Jun 2021 15:00:45 +0200 Subject: [PATCH 0831/1208] fix mkdir in CPAT --- CPAT.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index afce53e2..972613cf 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -45,7 +45,7 @@ task CPAT { # to non-optionals. command { set -e - mkdir -p "$(dirname ~{outFilePath})" + mkdir -p "$(dirname ~{outputPrefix})" cpat.py \ --gene ~{gene} \ --outfile ~{outputPrefix} \ From 8eb013496e3e81107ed18d8c5f067a9ffec15dea Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 30 Jun 2021 16:49:50 +0200 Subject: [PATCH 0832/1208] fix cpat dockerimage --- CPAT.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CPAT.wdl b/CPAT.wdl index 972613cf..e6cef3ea 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -36,7 +36,7 @@ task CPAT { String memory = "4G" Int timeMinutes = 10 + ceil(size(gene, "G") * 30) - String dockerImage = "biocontainers/cpat:3.0.4--py39hcbe4a3b_0" + String dockerImage = "quay.io/biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } # Some WDL magic in the command section to properly output the start and From 3c92beac7d694209332b66e6869c7c7b6a3ea885 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 1 Jul 2021 12:28:31 +0200 Subject: [PATCH 0833/1208] remove tabix from gridss --- gridss.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 52e039d1..6c8899e4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -112,7 +112,6 @@ task GRIDSS { ~{"--blacklist " + blacklistBed} \ ~{normalBam} \ ~{tumorBam} - tabix -p vcf ~{outputPrefix}.vcf.gz samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai } From 44a70a394df432fe678a0fa82ef015acf3e5c6d7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 09:42:45 +0200 Subject: [PATCH 0834/1208] fix missing backslash --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 6c8899e4..f9a92f56 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -167,7 +167,7 @@ task GridssAnnotateVcfRepeatmasker { } command { - gridss_annotate_vcf_repeatmasker + gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ -w . \ From 7c5ce8c031f34744f9759e59b2617113120a40be Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 10:50:50 +0200 Subject: [PATCH 0835/1208] set default timeMinutes GridssAnnotateVcfRepeatmasker to 120 --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index f9a92f56..02f32297 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(gridssVcf, "G") * 3) + Int timeMinutes = 120 } command { From f3ac54310bf8eabcf1fdeb61d1caca2149bac033 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 2 Jul 2021 12:35:33 +0200 Subject: [PATCH 0836/1208] Update spades.wdl Co-authored-by: Davy Cats --- spades.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/spades.wdl b/spades.wdl index 1f246d48..3975dd32 100644 --- a/spades.wdl +++ b/spades.wdl @@ -34,16 +34,16 @@ task Spades { File? tslrContigs File? trustedContigs File? untrustedContigs - Boolean singleCell = False - Boolean metagenomic = False - Boolean rna = False - Boolean plasmid = False - Boolean ionTorrent = False - Boolean onlyErrorCorrection = False - Boolean onlyAssembler = False - Boolean careful = False - Boolean disableGzipOutput = False - Boolean disableRepeatResolution = False + Boolean singleCell = false + Boolean metagenomic = false + Boolean rna = false + Boolean plasmid = false + Boolean ionTorrent = false + Boolean onlyErrorCorrection = false + Boolean onlyAssembler = false + Boolean careful = false + Boolean disableGzipOutput = false + Boolean disableRepeatResolution = false File? dataset File? tmpDir String? k From 4e0ab25eead014f9e9038bab7ea61a810dbb05cc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 13:02:18 +0200 Subject: [PATCH 0837/1208] small formatting fix --- gffcompare.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gffcompare.wdl b/gffcompare.wdl index aa7c7209..d06602bc 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -116,8 +116,8 @@ task GffCompare { runtime { memory: memory - time_minutes: timeMinutes - docker: dockerImage + time_minutes: timeMinutes + docker: dockerImage } parameter_meta { From 497f12a7446dc80873a66fa00db1c9bbc0eece99 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 13:47:29 +0200 Subject: [PATCH 0838/1208] adjust repeatmasker time --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 02f32297..db20a203 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 120 + Int timeMinutes = 1440 } command { From 7f4433f50b5ef8deaeb1d86beaaaae5ff07bae41 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 2 Jul 2021 15:25:28 +0200 Subject: [PATCH 0839/1208] fix missing memory runtime BgzipAndIndex --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index d34df51e..c8837d94 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -46,6 +46,7 @@ task BgzipAndIndex { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } From 477f00f57a1bf445672da7b7be7ed999e6230e93 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 5 Jul 2021 09:13:07 +0200 Subject: [PATCH 0840/1208] increase time for GridssAnnotateVcfRepeatmasker --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index db20a203..f137f968 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -163,7 +163,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "4G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1440 + Int timeMinutes = 2880 } command { From 4a32a443a29e324b8b01fac1fdbc01a7f2078f79 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 7 Jul 2021 09:39:47 +0200 Subject: [PATCH 0841/1208] increase memory repeatmasker --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index f137f968..ad230d05 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -161,7 +161,7 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "4G" + String memory = "50G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" Int timeMinutes = 2880 } From 9d3b5a556bd642d8dc8d098694497a5a3b1950fb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 7 Jul 2021 09:46:34 +0200 Subject: [PATCH 0842/1208] add threads to repeatmasker --- gridss.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index ad230d05..069d6953 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -162,6 +162,7 @@ task GridssAnnotateVcfRepeatmasker { String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" String memory = "50G" + Int threads = 4 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" Int timeMinutes = 2880 } @@ -171,6 +172,7 @@ task GridssAnnotateVcfRepeatmasker { --output ~{outputPath} \ --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ -w . \ + -t ~{threads} \ ~{gridssVcf} } @@ -180,6 +182,7 @@ task GridssAnnotateVcfRepeatmasker { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage @@ -189,6 +192,7 @@ task GridssAnnotateVcfRepeatmasker { gridssVcf: {description: "The GRIDSS output.", category: "required"} gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} + threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", From a40300a4d6fb9296ca9e4a1978fbeffe3cb86f90 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 16:59:58 +0200 Subject: [PATCH 0843/1208] update submodule scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index c31670d3..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From ff2b1efb8482282288107b28e1bf53ca91319b30 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 17:00:43 +0200 Subject: [PATCH 0844/1208] Change current development version in CHANGELOG.md to stable version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eda114e..7cb7a436 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.0-dev +version 5.0.0 --------------------------- + Update CPAT to version 3.0.4. + Changed the `outFilePath` input to `outputPrefix`. From a411311e0d74045541a000176c2f172c7d0679fd Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 17:02:48 +0200 Subject: [PATCH 0845/1208] update CI.yml --- .github/workflows/ci.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 97d329ad..78566111 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,10 +2,8 @@ name: Continuous integration on: pull_request: - paths: - - "**.wdl" # Workflow files and task - - "**.yml" # Ci configuration, tests and docker images - - "!docs/**" + paths_ignore: + - "docs/**" defaults: run: @@ -29,4 +27,4 @@ jobs: - name: install requirements run: conda install -n test cromwell miniwdl wdl-aid - name: run linting - run: bash scripts/biowdl_lint.sh \ No newline at end of file + run: bash scripts/biowdl_lint.sh From 9b0873ab9180e4af3a3ab869a4e909f5f0ee327e Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 7 Jul 2021 17:20:45 +0200 Subject: [PATCH 0846/1208] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 0062ac97..09b254e9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.0.0 +6.0.0 From c80402130bdb7471e8f37fece8cb643625a0df02 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 09:58:51 +0200 Subject: [PATCH 0847/1208] fix Xmx in AnnotateInsertedSequence --- gridss.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 069d6953..aedac9ab 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -39,7 +39,9 @@ task AnnotateInsertedSequence { } command { - AnnotateInsertedSequence -Xmx~{javaXmx} \ + set -e + _JAVA_OPTIONS="${_JAVA_OPTIONS}:-Xmx~{javaXmx}" + AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ OUTPUT=~{outputPath} \ @@ -215,6 +217,7 @@ task Virusbreakend { } command { + set -e mkdir virusbreakenddb tar -xzvf ~{virusbreakendDB} -C virusbreakenddb --strip-components 1 virusbreakend \ From 28b1a835d558d8ecd60682e9105731b6762f4c30 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 11:32:44 +0200 Subject: [PATCH 0848/1208] fix wrong placeholder --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index aedac9ab..66e27ff0 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -40,7 +40,7 @@ task AnnotateInsertedSequence { command { set -e - _JAVA_OPTIONS="${_JAVA_OPTIONS}:-Xmx~{javaXmx}" + _JAVA_OPTIONS="$_JAVA_OPTIONS:-Xmx~{javaXmx}" AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ From d28a2a529ede9ffc89b18628cc012c846354e096 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 11:53:24 +0200 Subject: [PATCH 0849/1208] typo --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 66e27ff0..fcfed095 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -35,12 +35,12 @@ task AnnotateInsertedSequence { String javaXmx = "8G" String memory = "9G" String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 3 / threads) + Int timeMinutes = 120 } command { set -e - _JAVA_OPTIONS="$_JAVA_OPTIONS:-Xmx~{javaXmx}" + _JAVA_OPTIONS="$_JAVA_OPTIONS -Xmx~{javaXmx}" AnnotateInsertedSequence \ REFERENCE_SEQUENCE=~{viralReference} \ INPUT=~{inputVcf} \ From 572114885be2bd0243ac59898c223fbf954e1510 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 14:40:29 +0200 Subject: [PATCH 0850/1208] update gripss version --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 646d01ea..9dc78dd8 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -290,7 +290,7 @@ task GripssApplicationKt { String memory = "33G" String javaXmx = "32G" Int timeMinutes = 45 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { @@ -346,7 +346,7 @@ task GripssHardFilterApplicationKt { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" } command { From f62a7424b88a1de1e6c1791aeff7c020a60939cd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 8 Jul 2021 14:42:20 +0200 Subject: [PATCH 0851/1208] fix gripss version in command --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 9dc78dd8..6a086d37 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -295,7 +295,7 @@ task GripssApplicationKt { command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ -reference ~{normalName} \ @@ -351,7 +351,7 @@ task GripssHardFilterApplicationKt { command { java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -cp /usr/local/share/hmftools-gripss-1.9-0/gripss.jar \ + -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ -output_vcf ~{outputPath} From e470f59fa587bef9dd075eb28ba6317be89a8416 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 10:01:09 +0200 Subject: [PATCH 0852/1208] update purple and gripss versions --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6a086d37..8c38c501 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -290,7 +290,7 @@ task GripssApplicationKt { String memory = "33G" String javaXmx = "32G" Int timeMinutes = 45 - String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } command { @@ -346,7 +346,7 @@ task GripssHardFilterApplicationKt { String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "umccr/gripss:1.11" #FIXME quay doesn't have the latest version "quay.io/biocontainers/hmftools-gripss:1.9--0" + String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } command { @@ -661,7 +661,7 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:2.54--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0" } command { From c47163aa1c9d67b5d675444d06afe36e5ee31ec9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 13:24:47 +0200 Subject: [PATCH 0853/1208] change docker image for purple --- hmftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 8c38c501..bf79070e 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -661,7 +661,8 @@ task Purple { Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - String dockerImage = "quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0" + # clone of quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' + String dockerImage = "quay.io/biowdl/hmftools-purple:3.1" } command { From ed6061d1671ba091992248375e613daf57fd544d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 9 Jul 2021 14:20:38 +0200 Subject: [PATCH 0854/1208] fix linx output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index bf79070e..a327fd0b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -488,7 +488,7 @@ task Linx { } output { - File driverCatalog = "~{outputDir}/~{sampleName}.driver.catalog.tsv" + File driverCatalog = "~{outputDir}/~{sampleName}.linx.driver.catalog.tsv" File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv" File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv" File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv" From 1fbf3eacc269782a6035c0c2e2bec348a31de0e8 Mon Sep 17 00:00:00 2001 From: cagaser Date: Mon, 19 Jul 2021 11:54:26 +0200 Subject: [PATCH 0855/1208] update common.wdl fastqc.wdl samtools.wdl somaticseq.wdl umi-tools.wdl wisestork.wdl: add runtime memory --- common.wdl | 28 +++++++++++++++++++++++----- fastqc.wdl | 4 +++- samtools.wdl | 3 ++- somaticseq.wdl | 10 ++++++++++ umi-tools.wdl | 2 ++ wisestork.wdl | 6 ++++++ 6 files changed, 46 insertions(+), 7 deletions(-) diff --git a/common.wdl b/common.wdl index 54b11567..d29ed5da 100644 --- a/common.wdl +++ b/common.wdl @@ -24,6 +24,8 @@ task AppendToStringArray { input { Array[String] array String string + + String memory = "1G" } command { @@ -36,7 +38,7 @@ task AppendToStringArray { } runtime { - memory: "1G" + memory: memory } } @@ -45,9 +47,11 @@ task CheckFileMD5 { input { File file String md5 + # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -60,6 +64,7 @@ task CheckFileMD5 { runtime { docker: dockerImage + memory: memory } } @@ -69,6 +74,8 @@ task ConcatenateTextFiles { String combinedFilePath Boolean unzip = false Boolean zip = false + + String memory = "1G" } # When input and output is both compressed decompression is not needed. @@ -86,7 +93,7 @@ task ConcatenateTextFiles { } runtime { - memory: "1G" + memory: memory } } @@ -97,6 +104,7 @@ task Copy { Boolean recursive = false # Version not that important as long as it is stable. + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -112,6 +120,7 @@ task Copy { runtime { docker: dockerImage + memory: memory } } @@ -122,6 +131,8 @@ task CreateLink { input { String inputFile String outputPath + + String memory = "1G" } command { @@ -131,12 +142,17 @@ task CreateLink { output { File link = outputPath } + + runtime { + memory: memory + } } task MapMd5 { input { Map[String,String] map + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -150,7 +166,7 @@ task MapMd5 { } runtime { - memory: "1G" + memory: memory docker: dockerImage } } @@ -160,6 +176,7 @@ task StringArrayMd5 { input { Array[String] stringArray + String memory = "1G" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -173,7 +190,7 @@ task StringArrayMd5 { } runtime { - memory: "1G" + memory: memory docker: dockerImage } } @@ -183,6 +200,7 @@ task TextToFile { String text String outputFile = "out.txt" + String memory = "1G" Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -196,7 +214,7 @@ task TextToFile { } runtime { - memory: "1G" + memory: memory time_minutes: timeMinutes docker: dockerImage } diff --git a/fastqc.wdl b/fastqc.wdl index 973eeed9..3a07db4e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -143,6 +143,7 @@ task Fastqc { task GetConfiguration { input { + String memory = "2G" # Needs more than 1 to pull the docker image. Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/fastqc:0.11.7--4" } @@ -165,7 +166,7 @@ task GetConfiguration { } runtime { - memory: "2G" # Needs more than 1 to pull the docker image. + memory: memory time_minute: timeMinutes docker: dockerImage } @@ -173,6 +174,7 @@ task GetConfiguration { parameter_meta { # inputs timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/samtools.wdl b/samtools.wdl index 46d1eb70..04e27fca 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -25,7 +25,7 @@ task BgzipAndIndex { File inputFile String outputDir String type = "vcf" - + String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -47,6 +47,7 @@ task BgzipAndIndex { runtime { time_minutes: timeMinutes docker: dockerImage + memory: memory } parameter_meta { diff --git a/somaticseq.wdl b/somaticseq.wdl index 07103ef9..27c3fe36 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -49,6 +49,7 @@ task ParallelPaired { Int threads = 1 Int timeMinutes = 60 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -92,6 +93,7 @@ task ParallelPaired { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -161,6 +163,7 @@ task ParallelPairedTrain { Int threads = 1 Int timeMinutes = 240 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -203,6 +206,7 @@ task ParallelPairedTrain { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -266,6 +270,7 @@ task ParallelSingle { Int threads = 1 Int timeMinutes = 60 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -302,6 +307,7 @@ task ParallelSingle { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -355,6 +361,7 @@ task ParallelSingleTrain { Int threads = 1 Int timeMinutes = 240 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -390,6 +397,7 @@ task ParallelSingleTrain { runtime { cpu: threads time_minutes: timeMinutes + memory: memory docker: dockerImage } @@ -430,6 +438,7 @@ task ModifyStrelka { String outputVCFName = basename(strelkaVCF, ".gz") Int timeMinutes = 20 + String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -448,6 +457,7 @@ task ModifyStrelka { runtime { time_minutes: timeMinutes + memory: memory docker: dockerImage } diff --git a/umi-tools.wdl b/umi-tools.wdl index 86bf1314..b79817c2 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -30,6 +30,7 @@ task Extract { String? read2Output = "umi_extracted_R2.fastq.gz" Boolean threePrime = false + String memory = "20G" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } @@ -51,6 +52,7 @@ task Extract { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } diff --git a/wisestork.wdl b/wisestork.wdl index 6be32168..8fb4b76b 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -31,6 +31,7 @@ task Count { Int? binSize File? binFile + String memory = "2G" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -50,6 +51,7 @@ task Count { } runtime { + memory: memory docker: dockerImage } } @@ -67,6 +69,7 @@ task GcCorrect { Int? iter Float? fracLowess + String memory = "2G" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -89,6 +92,7 @@ task GcCorrect { } runtime { + memory: memory docker: dockerImage } } @@ -143,6 +147,7 @@ task Zscore { Int? binSize File? binFile + String memory = "2G" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -163,6 +168,7 @@ task Zscore { } runtime { + memory: memory docker: dockerImage } } From 84cd7f692e38b4903e7945315a44265d7e16c5f2 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 09:04:24 +0200 Subject: [PATCH 0856/1208] update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cb7a436..d7bbc697 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.1-dev +--------------------------- ++ Update number of tasks: add memory runtime version 5.0.0 --------------------------- From e1281833adff3aab2489d315f8e7da98e0e2fade Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 14:41:54 +0200 Subject: [PATCH 0857/1208] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7bbc697..7f9df602 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.1-dev +version 5.0.1 --------------------------- + Update number of tasks: add memory runtime From c9c36cf805cb70e488136609d99f601b16b1aa66 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 16:39:50 +0200 Subject: [PATCH 0858/1208] update samtools.wdl somaticseq.wdl: add memory runtime --- samtools.wdl | 3 +++ somaticseq.wdl | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 46d1eb70..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,6 +26,7 @@ task BgzipAndIndex { String outputDir String type = "vcf" + String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -45,6 +46,7 @@ task BgzipAndIndex { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -54,6 +56,7 @@ task BgzipAndIndex { inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} diff --git a/somaticseq.wdl b/somaticseq.wdl index 07103ef9..2992a800 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -47,6 +47,7 @@ task ParallelPaired { File? strelkaSNV File? strelkaIndel + String memory = "2G" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -91,6 +92,7 @@ task ParallelPaired { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -121,6 +123,7 @@ task ParallelPaired { strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + ParallelPaired timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -159,6 +162,7 @@ task ParallelPairedTrain { File? strelkaSNV File? strelkaIndel + String memory = "2G" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -202,6 +206,7 @@ task ParallelPairedTrain { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -232,6 +237,7 @@ task ParallelPairedTrain { strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -264,6 +270,7 @@ task ParallelSingle { File? scalpelVCF File? strelkaVCF + String memory = "2G" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -301,6 +308,7 @@ task ParallelSingle { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -323,6 +331,7 @@ task ParallelSingle { scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -353,6 +362,7 @@ task ParallelSingleTrain { File? scalpelVCF File? strelkaVCF + String memory = "2G" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -389,6 +399,7 @@ task ParallelSingleTrain { runtime { cpu: threads + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -411,6 +422,7 @@ task ParallelSingleTrain { scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"} strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -429,6 +441,7 @@ task ModifyStrelka { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") + String memory = "2G" Int timeMinutes = 20 String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -447,6 +460,7 @@ task ModifyStrelka { } runtime { + memory: memory time_minutes: timeMinutes docker: dockerImage } @@ -455,6 +469,7 @@ task ModifyStrelka { # inputs strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"} outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 67c69d4eb336c69de999c66fadccf6c91345e0c7 Mon Sep 17 00:00:00 2001 From: cagaser Date: Tue, 20 Jul 2021 16:46:45 +0200 Subject: [PATCH 0859/1208] small fix --- somaticseq.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/somaticseq.wdl b/somaticseq.wdl index 2992a800..63f8362e 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -123,7 +123,7 @@ task ParallelPaired { strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"} strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} - ParallelPaired + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 5cb91703fb777ae35bcf6e509f7e124643339891 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 14:18:56 +0200 Subject: [PATCH 0860/1208] update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7cb7a436..dfb81c47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.1-dev +--------------------------- ++ add runtime memory to number of tasks. version 5.0.0 --------------------------- From 70cb8bdcbfa7d9384b2fd943a2686d01357854d3 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 14:34:30 +0200 Subject: [PATCH 0861/1208] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfb81c47..b3dbc7f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.0.1-dev +version 5.0.1 --------------------------- + add runtime memory to number of tasks. From 8349056c8bd768e472c4178201f3241edaa7952f Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 16:05:31 +0200 Subject: [PATCH 0862/1208] remove duplicated memory --- somaticseq.wdl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/somaticseq.wdl b/somaticseq.wdl index afe2918f..8c7fb884 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -50,7 +50,6 @@ task ParallelPaired { String memory = "2G" Int threads = 1 Int timeMinutes = 60 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -167,7 +166,6 @@ task ParallelPairedTrain { String memory = "2G" Int threads = 1 Int timeMinutes = 240 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -277,7 +275,6 @@ task ParallelSingle { String memory = "2G" Int threads = 1 Int timeMinutes = 60 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -371,7 +368,6 @@ task ParallelSingleTrain { String memory = "2G" Int threads = 1 Int timeMinutes = 240 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } @@ -451,7 +447,6 @@ task ModifyStrelka { String memory = "2G" Int timeMinutes = 20 - String memory = "2G" String dockerImage = "lethalfang/somaticseq:3.1.0" } From bc1bacf11498d2d30b85591cfccdcf71ef0966a5 Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 21 Jul 2021 16:22:35 +0200 Subject: [PATCH 0863/1208] remove duplicate memory --- samtools.wdl | 1 - somaticseq.wdl | 5 ----- 2 files changed, 6 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 54215831..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -49,7 +49,6 @@ task BgzipAndIndex { memory: memory time_minutes: timeMinutes docker: dockerImage - memory: memory } parameter_meta { diff --git a/somaticseq.wdl b/somaticseq.wdl index 8c7fb884..63f8362e 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -94,7 +94,6 @@ task ParallelPaired { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -209,7 +208,6 @@ task ParallelPairedTrain { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -312,7 +310,6 @@ task ParallelSingle { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -404,7 +401,6 @@ task ParallelSingleTrain { cpu: threads memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } @@ -466,7 +462,6 @@ task ModifyStrelka { runtime { memory: memory time_minutes: timeMinutes - memory: memory docker: dockerImage } From ab17de947e0509b853a60e87e80399e1ca83f826 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 13:37:37 +0200 Subject: [PATCH 0864/1208] add task for peach --- peach.wdl | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 peach.wdl diff --git a/peach.wdl b/peach.wdl new file mode 100644 index 00000000..9321d6bf --- /dev/null +++ b/peach.wdl @@ -0,0 +1,77 @@ +version 1.0 + +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Peach { + input { + File transcriptTsv + File germlineVcf + File germlineVcfIndex + File tumorName + File normalName + String outputDir = "./peach" + File panelJson + + String memory = "8G" + String dockerImage = "quay.io/biowdl/peach:v1.0" + Int timeMinutes = 20 + } + + command { + peach \ + --recreate_bed \ + --transcript_tsv ~{transcriptTsv} \ + ~{germlineVcf} \ + ~{tumorName} \ + ~{normalName} \ + 1.0 \ + ~{outputDir} \ + ~{panelJson} \ + vcftools + } + + output { + File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" + File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" + File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" + Array[File] peachFiles = [callsTsv, filterVcf, genotypeTsv] + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + transcriptTsv: {description: "A tsv file describing transcripts.", category: "required"} + germlineVcf: {description: "The germline VCF file from hmftools' purple.", category: "required"} + germlineVcfIndex: {description: "The germline VCF's index.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + normalName: {description: "The name of the normal sample", category: "required"} + outputDir: {description: "The directory the ouput should be written to.", category: "required"} + panelJson: {description: "A JSON describing the panel.", category: "required"} + + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 1648c818b856f22ed9e7c8b6443d2e9bc072eb6a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 14:21:41 +0200 Subject: [PATCH 0865/1208] rename array output peach --- peach.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peach.wdl b/peach.wdl index 9321d6bf..72c7fde6 100644 --- a/peach.wdl +++ b/peach.wdl @@ -52,7 +52,7 @@ task Peach { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] peachFiles = [callsTsv, filterVcf, genotypeTsv] + Array[File] outputs = [callsTsv, filterVcf, genotypeTsv] } runtime { From dcafd29087866bfa4bc464e9fd301e8de234c138 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 Jul 2021 15:20:03 +0200 Subject: [PATCH 0866/1208] fix validation issues --- peach.wdl | 2 +- samtools.wdl | 2 -- scripts | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/peach.wdl b/peach.wdl index 72c7fde6..5e0746aa 100644 --- a/peach.wdl +++ b/peach.wdl @@ -52,7 +52,7 @@ task Peach { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] outputs = [callsTsv, filterVcf, genotypeTsv] + Array[File] outputs = [callsTsv, filteredVcf, genotypeTsv] } runtime { diff --git a/samtools.wdl b/samtools.wdl index 7eb86351..81b6c17d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -28,7 +28,6 @@ task BgzipAndIndex { String memory = "2G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) - String memory = "1G" String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -59,7 +58,6 @@ task BgzipAndIndex { type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs diff --git a/scripts b/scripts index 84690a30..c31670d3 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 From dce31f572b08b3ef1ff3209f101ec4e3e838646c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 29 Jul 2021 12:05:22 +0200 Subject: [PATCH 0867/1208] update linx version --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index a327fd0b..1e25938d 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -464,7 +464,7 @@ task Linx { String memory = "5G" String javaXmx = "4G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.15--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.16--hdfd78af_0" } command { From 09d899b85aec47bcb065cb8b584e703828d488e7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Aug 2021 13:27:41 +0200 Subject: [PATCH 0868/1208] add bedtools coverage --- bedtools.wdl | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/bedtools.wdl b/bedtools.wdl index 3dbf93cb..f8713d2e 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -66,6 +66,55 @@ task Complement { } } +task Coverage { + input { + File genomeFile + File a + File? aIndex + File b + File? bIndex + String outputPath = "./coverage.tsv" + + String memory = "8G" + Int timeMinutes = 120 + String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + } + + command { + bedtools coverage \ + -sorted \ + -g ~{genomeFile} \ + -a ~{a} \ + -b ~{b} \ + -d \ + > ~{outputPath} + } + + output { + File coverageTsv = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + genomeFile: {description: "A file listing the chromosomes and their lengths.", category: "required"} + a: {description: "The file containing the regions for which the coverage will be counted.", category: "required"} + aIndex: {description: "An index for the file given as `a`.", category: "common"} + b: {description: "The file in which the coverage will be counted. Likely a BAM file.", category: "required"} + bIndex: {description: "An index for the file given as `b`.", category: "common"} + outputPath: {description: "The path the ouptu will be written to.", category: "common"} + + memory: {description: "The amount of memory needed for the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } +} + task Merge { input { File inputBed From 70cda88f96eecabb9b9a8d5f75f88515c0840a8f Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 18 Aug 2021 16:00:25 +0200 Subject: [PATCH 0869/1208] add deconstructSigs task --- deconstructsigs.wdl | 66 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 deconstructsigs.wdl diff --git a/deconstructsigs.wdl b/deconstructsigs.wdl new file mode 100644 index 00000000..ef47e3e3 --- /dev/null +++ b/deconstructsigs.wdl @@ -0,0 +1,66 @@ +# Copyright (c) 2021 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +version 1.0 + +task DeconstructSigs { + input { + File signaturesMatrix + File signaturesReference + String outputPath = "./signatures.rds" + + Int timeMinutes = 15 + String memory = "4G" + String dockerImage = "quay.io/biocontainers/r-deconstructsigs:1.9.0--r41hdfd78af_1" + } + + command { + R --no-echo << EOF + library(deconstructSigs) + tumor <- read.table("~{signaturesMatrix}", check.names=F) + ref <- data.frame(t(read.table("~{signaturesReference}", check.names=F, header=T, row.names="Type")), check.names=F) + tumor <- tumor[,colnames(ref)] + + sigs <- whichSignatures(tumor.ref=tumor, row.names(tumor), signatures.ref=ref, contexts.needed=T) + saveRDS(sigs, "~{outputPath}") + EOF + } + + output { + File signatureRDS = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + signaturesMatrix: {description: "A table containing columns represtenting mutation types (matching the types in the signatures reference) and one row with the counts for each of these types for the sample of intrest.", + category: "required"} + signaturesReference: {description: "A table describing the mutational signatures, formatted like those provided by COSMIC.", + category: "required"} + outputPath: {description: "The location the output will be written to.", category: "common"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 26574bf26bef2663e9a67fe99c2a241762eb4365 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 20 Aug 2021 13:43:07 +0200 Subject: [PATCH 0870/1208] update bedtools version for coverage --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index f8713d2e..1d956cab 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -77,7 +77,7 @@ task Coverage { String memory = "8G" Int timeMinutes = 120 - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } command { From e8df466dfba91be4e2c08e9fa57607ad48936d01 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 23 Aug 2021 12:11:39 +0200 Subject: [PATCH 0871/1208] fix incorrect type --- peach.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peach.wdl b/peach.wdl index 5e0746aa..b57842f7 100644 --- a/peach.wdl +++ b/peach.wdl @@ -25,8 +25,8 @@ task Peach { File transcriptTsv File germlineVcf File germlineVcfIndex - File tumorName - File normalName + String tumorName + String normalName String outputDir = "./peach" File panelJson From d76faa5a05528e6a74488b46a18bdfcd1a9402ea Mon Sep 17 00:00:00 2001 From: cedrick Date: Fri, 17 Sep 2021 09:55:37 +0200 Subject: [PATCH 0872/1208] update bcftools.wdk --- bcftools.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 0cbfdefd..0738d156 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -290,7 +290,7 @@ task View { input { File inputFile String outputPath = "output.vcf" - + Boolean excludeUncalled = false String? exclude String? include String memory = "256M" @@ -304,8 +304,9 @@ task View { set -e mkdir -p "$(dirname ~{outputPath})" bcftools view \ - ~{"--include " + include} \ ~{"--exclude " + exclude} \ + ~{"--include " + include} \ + ~{true="--exclude-uncalled" false="" excludeUncalled} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -330,6 +331,8 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 15b12101e04df8d842f68cb5ddef7f7f8a932a9e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Sep 2021 14:55:43 +0200 Subject: [PATCH 0873/1208] fix protect command with multiple doids --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1e25938d..199d7d88 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -577,7 +577,7 @@ task Protect { -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ -reference_sample_id ~{normalName} \ - -primary_tumor_doids ~{sep=";" sampleDoids} \ + -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ -doid_json ~{doidsJson} \ From a7a504e4a3589787d8c25c5ca97149598b65f572 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 24 Sep 2021 17:01:47 +0200 Subject: [PATCH 0874/1208] adjust resource settings --- gridss.wdl | 8 ++++---- hmftools.wdl | 4 ++-- peach.wdl | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index fcfed095..03193cca 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -94,7 +94,7 @@ task GRIDSS { File? gridssProperties Int jvmHeapSizeGb = 64 - Int threads = 4 + Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" } @@ -163,10 +163,10 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "50G" - Int threads = 4 + String memory = "25G" + Int threads = 8 String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" - Int timeMinutes = 2880 + Int timeMinutes = 1440 } command { diff --git a/hmftools.wdl b/hmftools.wdl index 199d7d88..f8b13c66 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -799,10 +799,10 @@ task Sage { String? mnvFilterEnabled File? coverageBed - Int threads = 2 + Int threads = 4 String javaXmx = "50G" String memory = "60G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 8 / threads) + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } diff --git a/peach.wdl b/peach.wdl index b57842f7..af44daec 100644 --- a/peach.wdl +++ b/peach.wdl @@ -30,9 +30,9 @@ task Peach { String outputDir = "./peach" File panelJson - String memory = "8G" + String memory = "2G" String dockerImage = "quay.io/biowdl/peach:v1.0" - Int timeMinutes = 20 + Int timeMinutes = 5 } command { From 9d5972de8bd3cb4e0766a78461a989f878f88999 Mon Sep 17 00:00:00 2001 From: cagaser Date: Thu, 30 Sep 2021 11:44:16 +0200 Subject: [PATCH 0875/1208] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3dbc7f6..71df5def 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.0.1 --------------------------- ++ Smoove: enable genotyping ++ Bcftools: add boolean option to remove uncalled genotypes. + add runtime memory to number of tasks. version 5.0.0 From 48f0c3ebf543b0c2e707c73fb00bdafe308a4395 Mon Sep 17 00:00:00 2001 From: cagaser Date: Fri, 1 Oct 2021 13:26:07 +0200 Subject: [PATCH 0876/1208] update changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71df5def..a6df9307 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,6 @@ that users understand how the changes affect the new version. version 5.0.1 --------------------------- + Smoove: enable genotyping -+ Bcftools: add boolean option to remove uncalled genotypes. + add runtime memory to number of tasks. version 5.0.0 From c48f3bb7078e52bbb653848857028ddc9d43a6de Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 28 Oct 2021 12:32:49 +0200 Subject: [PATCH 0877/1208] increase memory for sambamba markdup --- sambamba.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sambamba.wdl b/sambamba.wdl index e78f50b6..4c2115e0 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -81,8 +81,8 @@ task Markdup { # 2 threads reduces wall clock time by more than 40%. Int threads = 2 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size. - # Added 4096 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. - Int memoryMb = 4096 + sortBufferSize + 2 * ioBufferSize + # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. + Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" From 0932a62d6a00e5c600fcda7c3fa3a7aec40638bb Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Thu, 28 Oct 2021 08:52:01 -0600 Subject: [PATCH 0878/1208] Update chunked-scatter.wdl Older container is not producing the necessary bed file --- chunked-scatter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index fba1af5a..66954c36 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -86,7 +86,7 @@ task ScatterRegions { String memory = "256M" Int timeMinutes = 2 - String dockerImage = "quay.io/biocontainers/chunked-scatter:0.2.0--py_0" + String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } String finalSize = if defined(scatterSize) then "~{scatterSize}" else "~{scatterSizeMillions}000000" From 8224e2cb52132a7978db5760afa813d640d2bb74 Mon Sep 17 00:00:00 2001 From: Jeremy Leipzig Date: Thu, 28 Oct 2021 08:55:46 -0600 Subject: [PATCH 0879/1208] Update CHANGELOG.md --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a6df9307..6d40cd1f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.0.2 +--------------------------- ++ bumped ScatterRegions container to 1.0.0 + version 5.0.1 --------------------------- + Smoove: enable genotyping From 9e868dbcfbd4374ef6e04fbe389bf550be67a6ba Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 8 Nov 2021 14:26:37 +0100 Subject: [PATCH 0880/1208] add img input for virusbreakend --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 03193cca..b36d6598 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -207,6 +207,7 @@ task Virusbreakend { File bam File bamIndex File referenceFasta + File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" @@ -246,6 +247,7 @@ task Virusbreakend { bam: {description: "A BAM file.", category: "required"} bamIndex: {description: "The index for the BAM file.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"} virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0a1995df4f853799cb945a2bc8d3ac0062039efd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 9 Nov 2021 12:29:10 +0100 Subject: [PATCH 0881/1208] try version 2.11.1 for gridss --- gridss.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b36d6598..5c267e79 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 120 } @@ -96,7 +96,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" } command { @@ -165,7 +165,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 1440 } @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.0--h270b39a_1" + String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" Int timeMinutes = 180 } From 7d1f9c92406f9865e8c035a5bd19feea5a22b7ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 10 Nov 2021 12:46:04 +0100 Subject: [PATCH 0882/1208] upgrade gridss to 2.12.2 --- gridss.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 5c267e79..1f14e23b 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 120 } @@ -96,7 +96,7 @@ task GRIDSS { Int jvmHeapSizeGb = 64 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" } command { @@ -165,14 +165,14 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 1440 } command { gridss_annotate_vcf_repeatmasker \ --output ~{outputPath} \ - --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ -w . \ -t ~{threads} \ ~{gridssVcf} @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.11.1--hdfd78af_1" + String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" Int timeMinutes = 180 } @@ -226,7 +226,7 @@ task Virusbreakend { --workingdir . \ --reference ~{referenceFasta} \ --db virusbreakenddb \ - --jar /usr/local/share/gridss-2.12.0-1/gridss.jar \ + --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \ -t ~{threads} \ ~{bam} } From f9ed6158bfe70792d546e8e68b205f197c52b2ff Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 11 Nov 2021 10:59:46 +0100 Subject: [PATCH 0883/1208] increase memory gridss --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 1f14e23b..2e68ed88 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,7 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 64 + Int jvmHeapSizeGb = 85 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" From c9657636bed7c7046e3799a0c3fca36473ae80e6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 10:07:54 +0100 Subject: [PATCH 0884/1208] increase gridss memory --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 2e68ed88..13596a48 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,7 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 85 + Int jvmHeapSizeGb = 185 Int threads = 8 Int timeMinutes = ceil(4320 / threads) + 10 String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" From f64bd5367fee90d51d47db7c29af13816c9fedbe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 16:08:53 +0100 Subject: [PATCH 0885/1208] use alternative gridss image for virusbreakend --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 13596a48..b448a2dc 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -213,7 +213,7 @@ task Virusbreakend { String memory = "75G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 180 } From 1a9a8058f3991c0b76e934837dc64f80805fc4c6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 12 Nov 2021 21:55:59 +0100 Subject: [PATCH 0886/1208] change gridss runtime settings --- gridss.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b448a2dc..d93f1b80 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -34,7 +34,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" String memory = "9G" - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 120 } @@ -94,9 +94,9 @@ task GRIDSS { File? gridssProperties Int jvmHeapSizeGb = 185 - Int threads = 8 - Int timeMinutes = ceil(4320 / threads) + 10 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + Int threads = 4 + Int timeMinutes = ceil(7200 / threads) + 180 + String dockerImage = "quay.io/biowdl/gridss:2.12.2" } command { @@ -165,7 +165,7 @@ task GridssAnnotateVcfRepeatmasker { String memory = "25G" Int threads = 8 - String dockerImage = "quay.io/biocontainers/gridss:2.12.2--h270b39a_0" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 1440 } From d3d2040093a79814a1bf0488d13a44342068c5b5 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Sat, 13 Nov 2021 16:12:52 +0100 Subject: [PATCH 0887/1208] gridss more memory --- gridss.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index d93f1b80..8c05fe61 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,7 +93,8 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 185 + Int jvmHeapSizeGb = 200 + Int nonJvmMemoryGb = 50 Int threads = 4 Int timeMinutes = ceil(7200 / threads) + 180 String dockerImage = "quay.io/biowdl/gridss:2.12.2" @@ -126,7 +127,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + 15}G" + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}G" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } From e9f3c5fdf8aef7082911f6c40730264187cc6884 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 15 Nov 2021 11:55:52 +0100 Subject: [PATCH 0888/1208] make recovery sv vcf optional in purple --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index f8b13c66..1537bce5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -648,8 +648,8 @@ task Purple { File somaticVcf File germlineVcf File filteredSvVcf - File fullSvVcf - File fullSvVcfIndex + File? fullSvVcf + File? fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict @@ -676,7 +676,7 @@ task Purple { -somatic_vcf ~{somaticVcf} \ -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ - -sv_recovery_vcf ~{fullSvVcf} \ + ~{"-sv_recovery_vcf " + fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ -driver_catalog \ From 787ad56b36f24099ece60ae56a43af46cbbeaf00 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 26 Nov 2021 13:46:20 +0100 Subject: [PATCH 0889/1208] give dictionary and index to virusbreakend --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 8c05fe61..b43a3837 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -208,6 +208,8 @@ task Virusbreakend { File bam File bamIndex File referenceFasta + File referenceFastaFai + File referenceFastaDict File referenceImg File virusbreakendDB String outputPath = "./virusbreakend.vcf" From 86a249825272f9bb4384f87057593047402a1a37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Dec 2021 15:36:36 +0100 Subject: [PATCH 0890/1208] Add sampleposition in array task --- common.wdl | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/common.wdl b/common.wdl index d29ed5da..fc8dc481 100644 --- a/common.wdl +++ b/common.wdl @@ -148,6 +148,43 @@ task CreateLink { } } +task GetSamplePositionInArray { + input { + Array[String] sampleIds + String sample + + # python:3.7-slim's sha256 digest. This image is based on debian buster. + String dockerImage = "python@sha256:e0f6a4df17d5707637fa3557ab266f44dddc46ebfc82b0f1dbe725103961da4e" + } + + command <<< + python <>> + + output { + Int position = read_int(stdout()) + } + + runtime { + # 4 gigs of memory to be able to build the docker image in singularity. + memory: "4G" + docker: dockerImage + } + + parameter_meta { + # inputs + sampleIds: {description: "A list of sample ids.", category: "required"} + sample: {description: "The sample for which the position is wanted.", category: "required"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + position: {description: ""} + } +} + task MapMd5 { input { Map[String,String] map From d970e6892b1e61d34c99e507fb3a62b7b04f2fc1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 6 Dec 2021 16:33:41 +0100 Subject: [PATCH 0891/1208] Require 5 minutes --- common.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/common.wdl b/common.wdl index fc8dc481..1e4fc8cb 100644 --- a/common.wdl +++ b/common.wdl @@ -172,6 +172,7 @@ task GetSamplePositionInArray { # 4 gigs of memory to be able to build the docker image in singularity. memory: "4G" docker: dockerImage + timeMinutes: 5 } parameter_meta { From c21d27ff32bdf7210dddf98a711e32192e820a82 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:48:24 +0100 Subject: [PATCH 0892/1208] Add parameter_meta for macs2 --- macs2.wdl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 757eaf67..cbce18e9 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -26,11 +26,10 @@ task PeakCalling { Array[File]+ inputBamsIndex Array[File]+? controlBams Array[File]+? controlBamsIndex - String outDir + String outDir = "macs2" String sampleName Boolean nomodel = false - Int threads = 1 String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -50,8 +49,21 @@ task PeakCalling { } runtime { - cpu: threads + cpu: 1 memory: memory docker: dockerImage } + parameter_meta { + inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} + inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} + controlBams: {description: "Control BAM files for the input bam files.", category: "required"} + controlBamsIndex: {description: "The indexes for the control BAM files.", category: "required"} + sampleName: {description: "Name of the sample to be analysed", category: "required"} + outDir: {description: "All output files will be written in this directory.", category: "advanced"} + nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } } From 24ef56348f4ca8900f639d05aa28ec25fda3fbd1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:52:15 +0100 Subject: [PATCH 0893/1208] Add time minutes parameter --- macs2.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index cbce18e9..983630c5 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,7 +29,7 @@ task PeakCalling { String outDir = "macs2" String sampleName Boolean nomodel = false - + Int timeMinutes = 600 # Default to 10 hours String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -52,6 +52,7 @@ task PeakCalling { cpu: 1 memory: memory docker: dockerImage + time_minutes: timeMinutes } parameter_meta { inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} From 17746ebbb5668b8382050105b69f33273019a512 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 14:56:37 +0100 Subject: [PATCH 0894/1208] Update changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d40cd1f..126f1ed9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.1.0-dev +--------------------------- ++ Update parameter_meta for macs2 ++ Add sample position in array task. + version 5.0.2 --------------------------- + bumped ScatterRegions container to 1.0.0 From 019cbb96a68c2fca141c955126b0ad9b97511f2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 14 Dec 2021 16:00:30 +0100 Subject: [PATCH 0895/1208] More correct evaluation of controlBams input --- macs2.wdl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 983630c5..eb71ac1d 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -24,8 +24,8 @@ task PeakCalling { input { Array[File]+ inputBams Array[File]+ inputBamsIndex - Array[File]+? controlBams - Array[File]+? controlBamsIndex + Array[File] controlBams + Array[File] controlBamsIndex String outDir = "macs2" String sampleName Boolean nomodel = false @@ -38,7 +38,7 @@ task PeakCalling { set -e macs2 callpeak \ --treatment ~{sep = ' ' inputBams} \ - ~{true="--control" false="" defined(controlBams)} ~{sep = ' ' controlBams} \ + ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ ~{true='--nomodel' false='' nomodel} @@ -57,8 +57,8 @@ task PeakCalling { parameter_meta { inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"} inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"} - controlBams: {description: "Control BAM files for the input bam files.", category: "required"} - controlBamsIndex: {description: "The indexes for the control BAM files.", category: "required"} + controlBams: {description: "Control BAM files for the input bam files.", category: "common"} + controlBamsIndex: {description: "The indexes for the control BAM files.", category: "common"} sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} From 9c5ebf6bb9d32d030b783ed03f329db735a92b6f Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 15 Dec 2021 15:27:24 +0100 Subject: [PATCH 0896/1208] add umiAwareMarkDuplicate --- picard.wdl | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/picard.wdl b/picard.wdl index f75fdc32..0e189a60 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1004,3 +1004,57 @@ task RenameSample { renamedVcf: {description: "New VCF with renamed sample."} } } + +task UmiAwareMarkDuplicatesWithMateCigar { + input { + File inputBam + String outputPathBam + String outputPathMetrics + String outputPathUmiMetrics + String tempdir + Boolean dedup = true + + String memory = "10G" + Int timeMinutes = 360 + String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" ~{tempdir} + picard UmiAwareMarkDuplicatesWithMateCigar \ + I=~{inputBam} \ + O=~{outputPathBam} \ + M=~{outputPathMetrics} \ + UMI_METRICS_FILE=~{outputPathUmiMetrics} \ + TMP_DIR=~{tempdir} \ + REMOVE_DUPLICATES=~{dedup} \ + CREATE_INDEX=true \ + } + + output { + File outputBam = outputPathBam + File outputBamIndex = sub(outputPathBam, "\.bam$", ".bai") + File outputMetrics = outputPathMetrics + File outputUmiMetrics = outputPathUmiMetrics + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The unsorted input BAM file.", category: "required"} + outputPathBam: {description: "The location the output BAM file should be written to.", category: "required"} + outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} + outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} + tmpDir: {description: "Temporary directory.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + } +} \ No newline at end of file From 010ce0ac0835f0faa1353f3f43b544c2b0ecb50c Mon Sep 17 00:00:00 2001 From: cagaser Date: Wed, 15 Dec 2021 15:27:38 +0100 Subject: [PATCH 0897/1208] add annotateBamWithUmi --- fgbio.wdl | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 fgbio.wdl diff --git a/fgbio.wdl b/fgbio.wdl new file mode 100644 index 00000000..d50906d3 --- /dev/null +++ b/fgbio.wdl @@ -0,0 +1,68 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task AnnotateBamWithUmis { + input { + File inputBam + File inputUmi + String outputPath + + String memory = "120G" + Int timeMinutes = 360 + String javaXmx="100G" + String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPath})" + fgbio -Xmx~{javaXmx} \ + AnnotateBamWithUmis \ + -i ~{inputBam} \ + -f ~{inputUmi} \ + -o ~{outputPath} + } + + output { + File outputBam = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file.", category: "required"} + inputUmi: {description: "The input fastq file with UMIs.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "UMI-annotated output BAM file."} + } +} From 014d43cc204fcf1f7159717c047210ca3f008c40 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Jan 2022 13:32:55 +0100 Subject: [PATCH 0898/1208] Make sure task is consistent --- picard.wdl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/picard.wdl b/picard.wdl index 0e189a60..d8ce5ebe 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1008,11 +1008,11 @@ task RenameSample { task UmiAwareMarkDuplicatesWithMateCigar { input { File inputBam - String outputPathBam - String outputPathMetrics - String outputPathUmiMetrics - String tempdir - Boolean dedup = true + String outputPath + String outputPathMetrics = outputPath + ".metrics" + String outputPathUmiMetrics = outputPath + ".umi-metrics" + String tempdir = "temp" + Boolean removeDuplicates = true String memory = "10G" Int timeMinutes = 360 @@ -1024,17 +1024,17 @@ task UmiAwareMarkDuplicatesWithMateCigar { mkdir -p "$(dirname ~{outputPath})" ~{tempdir} picard UmiAwareMarkDuplicatesWithMateCigar \ I=~{inputBam} \ - O=~{outputPathBam} \ + O=~{outputPath} \ M=~{outputPathMetrics} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ - REMOVE_DUPLICATES=~{dedup} \ + REMOVE_DUPLICATES=~{removeDuplicates} \ CREATE_INDEX=true \ } output { - File outputBam = outputPathBam - File outputBamIndex = sub(outputPathBam, "\.bam$", ".bai") + File outputBam = outputPath + File outputBamIndex = sub(outputPath, "\.bam$", ".bai") File outputMetrics = outputPathMetrics File outputUmiMetrics = outputPathUmiMetrics } @@ -1048,10 +1048,11 @@ task UmiAwareMarkDuplicatesWithMateCigar { parameter_meta { # inputs inputBam: {description: "The unsorted input BAM file.", category: "required"} - outputPathBam: {description: "The location the output BAM file should be written to.", category: "required"} + outputPath: {description: "The location the output BAM file should be written to.", category: "required"} outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} - tmpDir: {description: "Temporary directory.", category: "advanced"} + removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} + tempdir: {description: "Temporary directory.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8ccabed5e8c56d2f742d5aba829104fe8db00d2d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Jan 2022 14:10:45 +0100 Subject: [PATCH 0899/1208] Allow multiple bam inputs --- picard.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/picard.wdl b/picard.wdl index d8ce5ebe..d2a6ca35 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1007,7 +1007,7 @@ task RenameSample { task UmiAwareMarkDuplicatesWithMateCigar { input { - File inputBam + Array[File] inputBams String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" @@ -1023,7 +1023,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { set -e mkdir -p "$(dirname ~{outputPath})" ~{tempdir} picard UmiAwareMarkDuplicatesWithMateCigar \ - I=~{inputBam} \ + INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ @@ -1047,7 +1047,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { parameter_meta { # inputs - inputBam: {description: "The unsorted input BAM file.", category: "required"} + inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"} outputPath: {description: "The location the output BAM file should be written to.", category: "required"} outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} From 89eaf097695f6bda12a20f0d5ce993a230a8342a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 11:11:01 +0100 Subject: [PATCH 0900/1208] Add script to extract umi from read name --- umi.wdl | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 umi.wdl diff --git a/umi.wdl b/umi.wdl new file mode 100644 index 00000000..fdf764f4 --- /dev/null +++ b/umi.wdl @@ -0,0 +1,100 @@ +version 1.0 + +# Copyright (c) 2022 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task BamReadNameToUmiTag { + + # This task processes a bam file with reads that have been extracted with + # umi-tools extract. The UMI is extracted from the read name again and put + # in the bam file again with umiTag (default RX) + input { + File inputBam + String outputPath = "output.bam" + String umiTag = "RX" + + String memory = "2G" + Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) + String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" + } + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command <<< + python < Tuple[str, str]: + id_and_rest = name.split(maxsplit=1) + if len(id_and_rest) == 1: + id, = id_and_rest + other_parts = "" + else: + id, other_parts = id_and_rest + underscore_index = id.rfind("_") + umi = id[underscore_index + 1:] + new_id = id[:underscore_index] + if other_parts: + return " ".join([new_id, other_parts]), umi + return new_id, umi + + def annotate_umis(in_file, out_file, bam_tag = "RX"): + in_bam = pysam.AlignmentFile(in_file, "rb") + out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + for segment in in_bam: # type: pysam.AlignedSegment + new_name, umi = split_umi_from_name(segment.query_name) + segment.query_name = new_name + # append does not work. (Pysam is not Pythonic.) + segment.tags = segment.tags + [(bam_tag, umi)] + out_bam.write(segment) + + if __name__ == "__main__": + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) + CODE + >>> + + output { + File outputBam = outputBam + File outputBamIndex = outputBamIndex + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input SAM file.", category: "required"} + outputPath: {description: "Output directory path + output file.", category: "common"} + umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} + + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "Sorted BAM file."} + outputBamIndex: {description: "Sorted BAM file index."} + } +} From 0a66c48bb5b75722d641d23c3421d2ca50c5ad21 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 11:46:00 +0100 Subject: [PATCH 0901/1208] Add umiTagName flag --- picard.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/picard.wdl b/picard.wdl index d2a6ca35..961364e4 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1013,6 +1013,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPathUmiMetrics = outputPath + ".umi-metrics" String tempdir = "temp" Boolean removeDuplicates = true + String umiTagName = "RX" String memory = "10G" Int timeMinutes = 360 @@ -1026,6 +1027,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ + UMI_TAG_NAME=~{umiTagName} \ UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ @@ -1052,6 +1054,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"} outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} + umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d383b38d49cec511e9b6212dc1507e10ddc2fcec Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 12:26:23 +0100 Subject: [PATCH 0902/1208] Dedent overindented code --- umi.wdl | 56 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/umi.wdl b/umi.wdl index fdf764f4..7c435654 100644 --- a/umi.wdl +++ b/umi.wdl @@ -37,38 +37,38 @@ task BamReadNameToUmiTag { String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command <<< python < Tuple[str, str]: - id_and_rest = name.split(maxsplit=1) - if len(id_and_rest) == 1: - id, = id_and_rest - other_parts = "" - else: - id, other_parts = id_and_rest - underscore_index = id.rfind("_") - umi = id[underscore_index + 1:] - new_id = id[:underscore_index] - if other_parts: - return " ".join([new_id, other_parts]), umi - return new_id, umi + def split_umi_from_name(name) -> Tuple[str, str]: + id_and_rest = name.split(maxsplit=1) + if len(id_and_rest) == 1: + id, = id_and_rest + other_parts = "" + else: + id, other_parts = id_and_rest + underscore_index = id.rfind("_") + umi = id[underscore_index + 1:] + new_id = id[:underscore_index] + if other_parts: + return " ".join([new_id, other_parts]), umi + return new_id, umi - def annotate_umis(in_file, out_file, bam_tag = "RX"): - in_bam = pysam.AlignmentFile(in_file, "rb") - out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) - for segment in in_bam: # type: pysam.AlignedSegment - new_name, umi = split_umi_from_name(segment.query_name) - segment.query_name = new_name - # append does not work. (Pysam is not Pythonic.) - segment.tags = segment.tags + [(bam_tag, umi)] - out_bam.write(segment) + def annotate_umis(in_file, out_file, bam_tag = "RX"): + in_bam = pysam.AlignmentFile(in_file, "rb") + out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + for segment in in_bam: # type: pysam.AlignedSegment + new_name, umi = split_umi_from_name(segment.query_name) + segment.query_name = new_name + # append does not work. (Pysam is not Pythonic.) + segment.tags = segment.tags + [(bam_tag, umi)] + out_bam.write(segment) - if __name__ == "__main__": - annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") - pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) + if __name__ == "__main__": + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) CODE >>> From acff4bd9fffbd5a6326b96144f2fe47c2b548a36 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 12:28:28 +0100 Subject: [PATCH 0903/1208] Also create directories --- umi.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index 7c435654..360405ff 100644 --- a/umi.wdl +++ b/umi.wdl @@ -37,8 +37,9 @@ task BamReadNameToUmiTag { String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") command <<< python < Date: Tue, 11 Jan 2022 12:55:10 +0100 Subject: [PATCH 0904/1208] Correct output files --- umi.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index 360405ff..2a4bc9cf 100644 --- a/umi.wdl +++ b/umi.wdl @@ -75,8 +75,8 @@ task BamReadNameToUmiTag { >>> output { - File outputBam = outputBam - File outputBamIndex = outputBamIndex + File outputBam = outputPath + File outputBamIndex = bamIndexPath } runtime { From 091058e29c0aba1d8c412ec21cda942e7597d23c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 13:55:03 +0100 Subject: [PATCH 0905/1208] Update changelog --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 126f1ed9..fe0667e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,11 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- -+ Update parameter_meta for macs2 ++ Add a script to subtract UMI's from the read name and add them as + a BAM tag for each BAM record. The script is in umi.BamReadNameToUmiTag. ++ Add fgbio.AnnotateBamWithUmis. ++ Add picard.UmiAwareMarkDuplicatesWithMateCigar. ++ Update parameter_meta for macs2. + Add sample position in array task. version 5.0.2 From 054b7a7f13891c1a85c5a4e8e596e0cfb7d5282a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 11 Jan 2022 16:36:37 +0100 Subject: [PATCH 0906/1208] Use more conventional list unpacking for clarity --- umi.wdl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/umi.wdl b/umi.wdl index 2a4bc9cf..e149cafe 100644 --- a/umi.wdl +++ b/umi.wdl @@ -45,11 +45,9 @@ task BamReadNameToUmiTag { def split_umi_from_name(name) -> Tuple[str, str]: id_and_rest = name.split(maxsplit=1) - if len(id_and_rest) == 1: - id, = id_and_rest - other_parts = "" - else: - id, other_parts = id_and_rest + id = id_and_rest[0] + # If there was no whitespace id_and_rest will have length 1 + other_parts = id_and_rest[1] if len(id_and_rest) == 2 else "" underscore_index = id.rfind("_") umi = id[underscore_index + 1:] new_id = id[:underscore_index] From 5df62f54b036d396ad78c966e19956a47df552c3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 14 Jan 2022 13:49:16 +0100 Subject: [PATCH 0907/1208] Add format parameter to macs2 --- macs2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/macs2.wdl b/macs2.wdl index eb71ac1d..e17d613b 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -28,6 +28,7 @@ task PeakCalling { Array[File] controlBamsIndex String outDir = "macs2" String sampleName + String format = "AUTO" Boolean nomodel = false Int timeMinutes = 600 # Default to 10 hours String memory = "8G" @@ -41,6 +42,7 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ + -f ~{format} \ ~{true='--nomodel' false='' nomodel} } From f05d7cb427d00a85994391b0e2829cc704bb3314 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 17 Jan 2022 09:08:56 +0100 Subject: [PATCH 0908/1208] Use set_tag call from pysam --- umi.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/umi.wdl b/umi.wdl index e149cafe..59169685 100644 --- a/umi.wdl +++ b/umi.wdl @@ -62,8 +62,7 @@ task BamReadNameToUmiTag { for segment in in_bam: # type: pysam.AlignedSegment new_name, umi = split_umi_from_name(segment.query_name) segment.query_name = new_name - # append does not work. (Pysam is not Pythonic.) - segment.tags = segment.tags + [(bam_tag, umi)] + segment.set_tag("RX", umi, value_type="Z") out_bam.write(segment) if __name__ == "__main__": From 28a2801941d6b56a64d1c413a4998ff220cd9899 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 17 Jan 2022 09:25:50 +0100 Subject: [PATCH 0909/1208] Use proper encoding --- umi.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/umi.wdl b/umi.wdl index 59169685..a32d646a 100644 --- a/umi.wdl +++ b/umi.wdl @@ -59,10 +59,14 @@ task BamReadNameToUmiTag { in_bam = pysam.AlignmentFile(in_file, "rb") os.makedirs(os.path.dirname(out_file), exist_ok=True) out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) + # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway. + encoded_bam_tag = bam_tag.encode('ascii') for segment in in_bam: # type: pysam.AlignedSegment new_name, umi = split_umi_from_name(segment.query_name) segment.query_name = new_name - segment.set_tag("RX", umi, value_type="Z") + # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway. + # Value type has to be a string though, otherwise pysam crashes. + segment.set_tag(encoded_bam_tag, umi.encode('ascii'), value_type="Z") out_bam.write(segment) if __name__ == "__main__": From 7b2d86fef3c90983b9ca57a9aded3872756d80e3 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 10:52:40 +0100 Subject: [PATCH 0910/1208] Set xmx value properly for UmiAwareMarkDuplicatesWithMateCigar --- picard.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index 961364e4..46b11e51 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1015,7 +1015,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { Boolean removeDuplicates = true String umiTagName = "RX" - String memory = "10G" + String javaXmx = "8G" + String memory = "9G" Int timeMinutes = 360 String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" } @@ -1023,7 +1024,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { command { set -e mkdir -p "$(dirname ~{outputPath})" ~{tempdir} - picard UmiAwareMarkDuplicatesWithMateCigar \ + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + UmiAwareMarkDuplicatesWithMateCigar \ INPUT=~{sep=' INPUT=' inputBams} \ O=~{outputPath} \ M=~{outputPathMetrics} \ From 09b97388eea432a1d0b4c37fe65f5621e13e9d0b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 14:18:17 +0100 Subject: [PATCH 0911/1208] Update Picard and reevaluate use of intel inflater/defaler --- picard.wdl | 99 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 67 insertions(+), 32 deletions(-) diff --git a/picard.wdl b/picard.wdl index 46b11e51..bf32c8ac 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -158,7 +158,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -284,7 +284,7 @@ task CollectRnaSeqMetrics { String memory = "9G" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -342,7 +342,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4G" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -404,7 +404,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -453,7 +453,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3G" - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -497,13 +497,15 @@ task GatherBamFiles { String outputBamPath Boolean createMd5File = false - Int? compressionLevel + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater Int javaXmxMb = 1024 Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -513,7 +515,9 @@ task GatherBamFiles { GatherBamFiles \ INPUT=~{sep=' INPUT=' inputBams} \ OUTPUT=~{outputBamPath} \ - ~{"COMPRESSION_LEVEL=" + compressionLevel} \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ CREATE_INDEX=true \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} } @@ -536,7 +540,9 @@ task GatherBamFiles { inputBamsIndex: {description: "The indexes of the input BAM files.", category: "required"} outputBamPath: {description: "The path where the merged BAM file will be written.", caregory: "required"} createMd5File: {decription: "Whether to create an md5 file of the output BAM.", category: "advanced"} - compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -555,10 +561,14 @@ task GatherVcfs { Array[File]+ inputVcfIndexes String outputVcfPath = "out.vcf.gz" + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater + String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -566,6 +576,10 @@ task GatherVcfs { mkdir -p "$(dirname ~{outputVcfPath})" picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ GatherVcfs \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ + CREATE_INDEX=true \ INPUT=~{sep=' INPUT=' inputVcfs} \ OUTPUT=~{outputVcfPath} } @@ -590,6 +604,10 @@ task GatherVcfs { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + # outputs outputVcf: {description: "Multiple VCF files gathered into one file."} } @@ -601,14 +619,11 @@ task MarkDuplicates { Array[File]+ inputBams String outputBamPath String metricsPath - Int compressionLevel = 1 Boolean createMd5File = false - Boolean useJdkInflater = true # Slightly faster than the intel one. - # Better results for compression level 1 (much smaller). - # Higher compression levels similar to intel deflater. - # NOTE: this might change in the future when the intel - # deflater is updated! - Boolean useJdkDeflater = true + + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater # The program default for READ_NAME_REGEX is appropriate in nearly every case. # Sometimes we wish to supply "null" in order to turn off optical duplicate detection. @@ -622,7 +637,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -638,6 +653,8 @@ task MarkDuplicates { OUTPUT=~{outputBamPath} \ METRICS_FILE=~{metricsPath} \ COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ VALIDATION_STRINGENCY=SILENT \ ~{"READ_NAME_REGEX=" + read_name_regex} \ OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \ @@ -668,9 +685,9 @@ task MarkDuplicates { outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"} metricsPath: {description: "The location where the output metrics file should be written.", category: "required"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} - createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"} read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"} javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"} memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"} @@ -692,16 +709,20 @@ task MergeVCFs { Array[File]+ inputVCFsIndexes String outputVcfPath Int compressionLevel = 1 - Boolean useJdkInflater = true # Slightly faster than the intel one. + Boolean useJdkInflater = false # Better results for compression level 1 (much smaller). # Higher compression levels similar to intel deflater. # NOTE: this might change in the future when the intel deflater is updated! - Boolean useJdkDeflater = true + # Second NOTE: No it did not change. Only the fastest algorithm with + # worse compression is wrapped in the intel GKL. Instead of using + # one of the slightly slower but better compressing alternatives from ISA-L. + # (Which are also faster than zlib.) + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" String memory = "5G" Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -757,7 +778,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17G" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" File? noneFile } @@ -818,7 +839,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4G" - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -853,13 +874,15 @@ task SortSam { Boolean createMd5File = false Int maxRecordsInRam = 500000 Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater # Default ram of 4 GB. Using 125001.0 to prevent an answer of # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -872,6 +895,8 @@ task SortSam { SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ VALIDATION_STRINGENCY=SILENT \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} @@ -896,7 +921,9 @@ task SortSam { sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"} createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"} maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} - compressionLevel: {description: "Compression level for all compressed files created.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -917,7 +944,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -967,7 +994,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9G" Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.23.8--0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -1014,11 +1041,13 @@ task UmiAwareMarkDuplicatesWithMateCigar { String tempdir = "temp" Boolean removeDuplicates = true String umiTagName = "RX" - + Int compressionLevel = 1 + Boolean useJdkInflater = false + Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "8G" String memory = "9G" Int timeMinutes = 360 - String dockerImage = "quay.io/biocontainers/picard:2.25.7--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } command { @@ -1034,6 +1063,9 @@ task UmiAwareMarkDuplicatesWithMateCigar { TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ CREATE_INDEX=true \ + COMPRESSION_LEVEL=~{compressionLevel} \ + USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { @@ -1058,6 +1090,9 @@ task UmiAwareMarkDuplicatesWithMateCigar { removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} + compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} + useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From a0933e34c55d4bed26510e0fd09fe013441898c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 14:38:42 +0100 Subject: [PATCH 0912/1208] Add option to assume sort order --- picard.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index bf32c8ac..144c7782 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,6 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" + String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true String umiTagName = "RX" @@ -1065,7 +1066,8 @@ task UmiAwareMarkDuplicatesWithMateCigar { CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} + USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \ + ~{"ASSUME_SORT_ORDER=" + assumeSortOrder} } output { @@ -1089,6 +1091,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"} removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"} umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"} + assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} From 58682093853cf6e62304d7797f3f268587187669 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Jan 2022 15:42:12 +0100 Subject: [PATCH 0913/1208] Have more records in ram --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index 144c7782..e81cd4e3 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,6 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" + Int maxRecordsInRam = 3000000 # Default is 500_000 but that will lead to very small files on disk. String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true @@ -1063,6 +1064,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { UMI_METRICS_FILE=~{outputPathUmiMetrics} \ TMP_DIR=~{tempdir} \ REMOVE_DUPLICATES=~{removeDuplicates} \ + MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \ CREATE_INDEX=true \ COMPRESSION_LEVEL=~{compressionLevel} \ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ From 89a0324ab9467ab79528ce3908701d7b230b2822 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 19 Jan 2022 12:59:00 +0100 Subject: [PATCH 0914/1208] increase resources GRIDSS --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b43a3837..7d6a1ebf 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,10 +93,10 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 200 + Int jvmHeapSizeGb = 300 Int nonJvmMemoryGb = 50 Int threads = 4 - Int timeMinutes = ceil(7200 / threads) + 180 + Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } From 99215fdd9834f39569e5672b9daf5b010a777abc Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Jan 2022 12:56:23 +0100 Subject: [PATCH 0915/1208] update scripts and changelog --- CHANGELOG.md | 37 +++++++++++++++++++++++++++++++++++++ bcftools.wdl | 4 ++-- scripts | 2 +- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 126f1ed9..6c0db947 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,43 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task for SnpEff. ++ Adjusted runtime settings for sambamba Markdup. ++ Added a task for sambamba Flagstat. ++ Added a task for Picard CollectWgsMetrics. ++ Added a task for Peach. ++ Added tasks for HMFtools: + + Amber + + Cobalt + + Cuppa + + CuppaChart + + GripssApplicationKt + + GripssHardFilterApplicationKt + + HealthChecker + + Linx + + Protect + + Purple + + Sage + + VirusInterpreter ++ Added a task for VirusBreakend. ++ Added a task for GridssAnnotateVcfRepeatmasker. ++ Bumped GRIDSS version to 2.12.2. ++ Adjusted GRIDSS runtime settings. ++ Added optional inputs to GRIDSS: + + blacklistBed + + gridssProperties ++ Added a task for GRIDSS AnnotateInsertedSequence. ++ Added a task for ExtractSigPredictHRD. ++ Added a task for DeconstructSigs. ++ Added option useSoftclippingForSupplementary (default false) to + BWA mem. ++ Adjusted BWA mem runtime settings. ++ Added a task for bedtools coverage. ++ Added a task for bcftools filter. ++ Adjusted runtime settings for bcftools annotate. ++ Added optional inputs to bcftools annotate: + + inputFileIndex + + annsFileIndex + Update parameter_meta for macs2 + Add sample position in array task. diff --git a/bcftools.wdl b/bcftools.wdl index 13ce36be..88d97cd0 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -44,8 +44,8 @@ task Annotate { String? regions File? regionsFile File? renameChrs - File? samplesFile - + File? samplesFile + Int threads = 0 String memory = "4G" Int timeMinutes = 60 + ceil(size(inputFile, "G")) diff --git a/scripts b/scripts index c31670d3..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit c31670d3a9222a2feafc649cbc118c95afbc7189 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From bf4c1a3e8ab1bbd73a8d7a3fe29a15ac8ad69153 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 26 Jan 2022 16:24:09 +0100 Subject: [PATCH 0916/1208] adress comments --- hmftools.wdl | 108 +++++++++++++++++++++++++-------------------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1537bce5..0b4ba6d0 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -22,9 +22,9 @@ version 1.0 task Amber { input { - String normalName - File normalBam - File normalBamIndex + String referenceName + File referenceBam + File referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -43,8 +43,8 @@ task Amber { command { AMBER -Xmx~{javaXmx} \ - -reference ~{normalName} \ - -reference_bam ~{normalBam} \ + -reference ~{referenceName} \ + -reference_bam ~{referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ @@ -63,8 +63,8 @@ task Amber { File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi" File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv" File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" - File normalSnpVcf = "~{outputDir}/~{normalName}.amber.snp.vcf.gz" - File normalSnpVcfIndex = "~{outputDir}/~{normalName}.amber.snp.vcf.gz.tbi" + File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" + File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] @@ -78,9 +78,9 @@ task Amber { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} - normalBam: {description: "The normal BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} + referenceBam: {description: "The normal BAM file.", category: "required"} + referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The tumor BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} @@ -102,9 +102,9 @@ task Amber { task Cobalt { input { - String normalName - File normalBam - File normalBamIndex + String referenceName + File referenceBam + File referenceBamIndex String tumorName File tumorBam File tumorBamIndex @@ -120,8 +120,8 @@ task Cobalt { command { COBALT -Xmx~{javaXmx} \ - -reference ~{normalName} \ - -reference_bam ~{normalBam} \ + -reference ~{referenceName} \ + -reference_bam ~{referenceBam} \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ -output_dir ~{outputDir} \ @@ -131,9 +131,9 @@ task Cobalt { output { File version = "~{outputDir}/cobalt.version" - File normalGcMedianTsv = "~{outputDir}/~{normalName}.cobalt.gc.median.tsv" - File normalRationMedianTsv = "~{outputDir}/~{normalName}.cobalt.ratio.median.tsv" - File normalRationPcf = "~{outputDir}/~{normalName}.cobalt.ratio.pcf" + File normalGcMedianTsv = "~{outputDir}/~{referenceName}.cobalt.gc.median.tsv" + File normalRationMedianTsv = "~{outputDir}/~{referenceName}.cobalt.ratio.median.tsv" + File normalRationPcf = "~{outputDir}/~{referenceName}.cobalt.ratio.pcf" File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv" File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf" File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv" @@ -150,9 +150,9 @@ task Cobalt { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} - normalBam: {description: "The normal BAM file.", category: "required"} - normalBamIndex: {description: "The index for the normal BAM file.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} + referenceBam: {description: "The normal BAM file.", category: "required"} + referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The tumor BAM file.", category: "required"} tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"} @@ -279,7 +279,7 @@ task GripssApplicationKt { File inputVcf String outputPath = "gripss.vcf.gz" String tumorName - String normalName + String referenceName File referenceFasta File referenceFastaFai File referenceFastaDict @@ -287,8 +287,8 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "33G" - String javaXmx = "32G" + String memory = "32G" + String javaXmx = "31G" Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" } @@ -298,7 +298,7 @@ task GripssApplicationKt { -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssApplicationKt \ -tumor ~{tumorName} \ - -reference ~{normalName} \ + -reference ~{referenceName} \ -ref_genome ~{referenceFasta} \ -breakpoint_hotspot ~{breakpointHotspot} \ -breakend_pon ~{breakendPon} \ @@ -383,9 +383,9 @@ task GripssHardFilterApplicationKt { task HealthChecker { input { String outputDir = "." - String normalName - File normalFlagstats - File normalMetrics + String referenceName + File referenceFlagstats + File referenceMetrics String tumorName File tumorFlagstats File tumorMetrics @@ -401,9 +401,9 @@ task HealthChecker { set -e mkdir -p ~{outputDir} health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ - -reference ~{normalName} \ - -ref_flagstat_file ~{normalFlagstats} \ - -ref_wgs_metrics_file ~{normalMetrics} \ + -reference ~{referenceName} \ + -ref_flagstat_file ~{referenceFlagstats} \ + -ref_wgs_metrics_file ~{referenceMetrics} \ -tumor ~{tumorName} \ -tum_flagstat_file ~{tumorFlagstats} \ -tum_wgs_metrics_file ~{tumorMetrics} \ @@ -425,9 +425,9 @@ task HealthChecker { parameter_meta { outputDir: {description: "The path the output will be written to.", category:"required"} - normalName: {description: "The name of the normal sample.", category: "required"} - normalFlagstats: {description: "The flagstats for the normal sample.", category: "required"} - normalMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} @@ -546,11 +546,11 @@ task Protect { input { String refGenomeVersion String tumorName - String normalName + String referenceName Array[String]+ sampleDoids String outputDir = "." Array[File]+ serveActionability - File doidsJson + File doidJson File purplePurity File purpleQc File purpleDriverCatalogSomatic @@ -576,11 +576,11 @@ task Protect { protect -Xmx~{javaXmx} \ -ref_genome_version ~{refGenomeVersion} \ -tumor_sample_id ~{tumorName} \ - -reference_sample_id ~{normalName} \ + -reference_sample_id ~{referenceName} \ -primary_tumor_doids '~{sep=";" sampleDoids}' \ -output_dir ~{outputDir} \ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \ - -doid_json ~{doidsJson} \ + -doid_json ~{doidJson} \ -purple_purity_tsv ~{purplePurity} \ -purple_qc_file ~{purpleQc} \ -purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \ @@ -608,11 +608,11 @@ task Protect { parameter_meta { refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} - normalName: {description: "The name of the normal sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"} - doidsJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} purplePurity: {description: "The purity file generated by purple.", category: "required"} purpleQc: {description: "The QC file generated by purple.", category: "required"} purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"} @@ -639,7 +639,7 @@ task Protect { task Purple { input { - String normalName + String referenceName String tumorName String outputDir = "./purple" Array[File]+ amberOutput @@ -667,7 +667,7 @@ task Purple { command { PURPLE -Xmx~{javaXmx} \ - -reference ~{normalName} \ + -reference ~{referenceName} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ @@ -713,7 +713,7 @@ task Purple { File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" File purpleVersion = "~{outputDir}/purple.version" - File circosNormalRatio = "~{outputDir}/circos/~{normalName}.ratio.circos" + File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" @@ -744,7 +744,7 @@ task Purple { } parameter_meta { - normalName: {description: "the name of the normal sample.", category: "required"} + referenceName: {description: "the name of the normal sample.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} outputDir: {description: "The path to the output directory.", category: "common"} amberOutput: {description: "The output files of hmftools amber.", category: "required"} @@ -787,9 +787,9 @@ task Sage { Boolean panelOnly = false String outputPath = "./sage.vcf.gz" - String? normalName - File? normalBam - File? normalBamIndex + String? referenceName + File? referenceBam + File? referenceBamIndex Int? hotspotMinTumorQual Int? panelMinTumorQual Int? hotspotMaxGermlineVaf @@ -801,8 +801,8 @@ task Sage { Int threads = 4 String javaXmx = "50G" - String memory = "60G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, normalBam]), "G") * 9 / threads) + String memory = "51G" + Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" } @@ -810,8 +810,8 @@ task Sage { SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -tumor ~{tumorName} \ -tumor_bam ~{tumorBam} \ - ~{"-reference " + normalName} \ - ~{"-reference_bam " + normalBam} \ + ~{"-reference " + referenceName} \ + ~{"-reference_bam " + referenceBam} \ -ref_genome ~{referenceFasta} \ -hotspots ~{hotspots} \ -panel_bed ~{panelBed} \ @@ -848,9 +848,9 @@ task Sage { tumorName: {description: "The name of the tumor sample.", category: "required"} tumorBam: {description: "The BAM file for the tumor sample.", category: "required"} tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"} - normalName: {description: "The name of the normal/reference sample.", category: "common"} - normalBam: {description: "The BAM file for the normal sample.", category: "common"} - normalBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} + referenceName: {description: "The name of the normal/reference sample.", category: "common"} + referenceBam: {description: "The BAM file for the normal sample.", category: "common"} + referenceBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} From fb91a02460b22501cc1c57dc381a486a29b01fbd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 27 Jan 2022 12:01:28 +0100 Subject: [PATCH 0917/1208] update healthchecker --- hmftools.wdl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 0b4ba6d0..76620e3c 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -408,13 +408,16 @@ task HealthChecker { -tum_flagstat_file ~{tumorFlagstats} \ -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ - -output_dir ~{outputDir} + -output_dir ~{outputDir} + test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' + test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' } - output { - File? healthCheckSucceeded = "~{outputDir}/~{tumorName}.HealthCheckSucceeded" - File? healthCheckFailed = "~{outputDir}/~{tumorName}.HealthCheckFailed" + Boolean succeeded = read_boolean("result") + File outputFile = if succeeded + then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" + else "~{outputDir}/~{tumorName}.HealthCheckFailed" } runtime { From f234b0e8f46192d248e564f22bcd88912b890576 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 28 Jan 2022 14:42:42 +0100 Subject: [PATCH 0918/1208] add missing parameter_meta --- gridss.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 7d6a1ebf..d3d251a5 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -146,7 +146,8 @@ task GRIDSS { gridssProperties: {description: "A properties file for gridss.", category: "advanced"} threads: {description: "The number of the threads to use.", category: "advanced"} - jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling",category: "advanced"} + jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling", category: "advanced"} + nonJvmMemoryGb: {description: "The amount of memory in Gb to be requested besides JVM memory.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From b3b79f62d4a538642318c0316080f9a098ca4b48 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Feb 2022 14:24:45 +0100 Subject: [PATCH 0919/1208] add a task for Pave --- hmftools.wdl | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 76620e3c..1dbfd5de 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -545,6 +545,79 @@ task Linx { } } +task Pave { + input { + String outputDir = "./" + String sampleName + File vcfFile + File vcfFileIndex + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File refGenomeVersion + File driverGenePanel + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + + Int timeMinutes = 50 + String javaXmx = "8G" + String memory = "9G" + String dockerImage = "quay.io/biowdl/pave:v1.0" + } + + command { + set -e + mkdir -p ~{outputDir} + pave -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -sample ~{sampleName} \ + -vcf_file ~{vcfFile} \ + -output_dir ~{outputDir} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -ref_genome ~{referenceFasta} \ + -ref_genome_version ~{refGenomeVersion} \ + -driver_gene_panel ~{driverGenePanel} + } + + output { + File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" + File outputVcfIndex = "~{outputVcf}.tbi" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + sampleName: {description: "The name of the sample.", category: "required"} + vcfFile: {description: "The input VCF file.", category: "required"} + vcfFileIndex: {description: "The index for the input vcf file.", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + #The following should be in the same directory. + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Protect { input { String refGenomeVersion From 3ffa051fd2be4edb4fbc466836c9da782e68be27 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Feb 2022 17:04:00 +0100 Subject: [PATCH 0920/1208] add task for gripss 2.0 --- hmftools.wdl | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 77 insertions(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 1dbfd5de..c0c835b5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -274,7 +274,79 @@ task CuppaChart { } } +task Gripss { + input { + File referenceFasta + File referenceFastaFai + File referenceFastaDict + File knownFusionPairBedpe + File breakendPon + File breakpointPon + String referenceName + String tumorName + File vcf + File vcfIndex + String outputDir = "./" + + String memory = "17G" + String javaXmx = "16G" + Int timeMinutes = 50 + String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + gripss -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -ref_genome ~{referenceFasta} \ + -known_hotspot_file ~{knownFusionPairBedpe} \ + -pon_sgl_file ~{breakendPon} \ + -pon_sv_file ~{breakpointPon} \ + -reference ~{referenceName} \ + -sample ~{tumorName} \ + -vcf ~{vcf} \ + -output_dir ~{outputDir} \ + -output_id somatic + } + + output { + File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" + File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz.tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", + category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + knownFusionPairBedpe: {description: "Equivalent to the `-known_hotspot_file` option.", category: "required"} + breakendPon: {description: "Equivalent to the `-pon_sgl_file` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `-pon_sv_file` option.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + vcf: {description: "The input VCF.", category: "required"} + vcfIndex: {description: "The index for the input VCF.", category: "required"} + outputDir: {description: "The path the output will be written to.", category:"required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GripssApplicationKt { + # Obsolete input { File inputVcf String outputPath = "gripss.vcf.gz" @@ -322,13 +394,15 @@ task GripssApplicationKt { parameter_meta { inputVcf: {description: "The input VCF.", category: "required"} outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"} + referenceName: {description: "The name of the normal sample.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"} breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"} - breakpointPon: {description: "Equivalent to the `breakpoint_pon` option.", category: "required"} + breakpointPon: {description: "Equivalent to the `-breakpoint_pon` option.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} @@ -339,6 +413,7 @@ task GripssApplicationKt { } task GripssHardFilterApplicationKt { + # Obsolete input { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" @@ -724,6 +799,7 @@ task Purple { File somaticVcf File germlineVcf File filteredSvVcf + File filteredSvVcfIndex File? fullSvVcf File? fullSvVcfIndex File referenceFasta From 22a880cdd2223034ebb80fcdb1006b2bd3fe81c7 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 11:52:10 +0100 Subject: [PATCH 0921/1208] update purple to 3.2 --- hmftools.wdl | 54 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c0c835b5..caafa440 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -65,8 +65,8 @@ task Amber { File tumorQc = "~{outputDir}/~{tumorName}.amber.qc" File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz" File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi" - Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, - tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, + Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex, + tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc, normalSnpVcf, normalSnpVcfIndex] } @@ -110,7 +110,7 @@ task Cobalt { File tumorBamIndex String outputDir = "./cobalt" File gcProfile - + Int threads = 1 String memory = "5G" String javaXmx = "4G" @@ -174,7 +174,7 @@ task Cuppa { Array[File]+ purpleOutput String sampleName Array[String]+ categories = ["DNA"] - Array[File]+ referenceData + Array[File]+ referenceData File purpleSvVcf File purpleSvVcfIndex File purpleSomaticVcf @@ -244,7 +244,7 @@ task CuppaChart { } command { - set -e + set -e mkdir -p ~{outputDir} cuppa-chart \ -sample ~{sampleName} \ @@ -429,7 +429,7 @@ task GripssHardFilterApplicationKt { -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \ -input_vcf ~{inputVcf} \ - -output_vcf ~{outputPath} + -output_vcf ~{outputPath} } output { @@ -490,7 +490,7 @@ task HealthChecker { output { Boolean succeeded = read_boolean("result") - File outputFile = if succeeded + File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" } @@ -675,10 +675,9 @@ task Pave { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"} - referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} - #The following should be in the same directory. geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} @@ -757,7 +756,7 @@ task Protect { } parameter_meta { - refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} referenceName: {description: "The name of the normal sample.", category: "required"} sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} @@ -800,41 +799,47 @@ task Purple { File germlineVcf File filteredSvVcf File filteredSvVcfIndex - File? fullSvVcf - File? fullSvVcfIndex + File fullSvVcf + File fullSvVcfIndex File referenceFasta File referenceFastaFai File referenceFastaDict File driverGenePanel File somaticHotspots File germlineHotspots - + #The following should be in the same directory. + File geneDataCsv + File proteinFeaturesCsv + File transExonDataCsv + File transSpliceDataCsv + Int threads = 1 Int timeMinutes = 30 String memory = "9G" String javaXmx = "8G" - # clone of quay.io/biocontainers/hmftools-purple:3.1--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' - String dockerImage = "quay.io/biowdl/hmftools-purple:3.1" + # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' + String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" } command { PURPLE -Xmx~{javaXmx} \ -reference ~{referenceName} \ + -germline_vcf ~{germlineVcf} \ + -germline_hotspots ~{germlineHotspots} \ -tumor ~{tumorName} \ -output_dir ~{outputDir} \ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \ -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \ -gc_profile ~{gcProfile} \ -somatic_vcf ~{somaticVcf} \ - -germline_vcf ~{germlineVcf} \ -structural_vcf ~{filteredSvVcf} \ - ~{"-sv_recovery_vcf " + fullSvVcf} \ + -sv_recovery_vcf ~{fullSvVcf} \ -circos /usr/local/bin/circos \ -ref_genome ~{referenceFasta} \ - -driver_catalog \ - -driver_gene_panel ~{driverGenePanel} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ - -germline_hotspots ~{germlineHotspots} \ + -driver_gene_panel ~{driverGenePanel} \ -threads ~{threads} } @@ -877,8 +882,8 @@ task Purple { File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, - purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, - purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, + purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, + purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, @@ -913,6 +918,11 @@ task Purple { driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} + geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} + transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} + transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"} + threads: {description: "The number of threads the program will use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 0d7909255421e4e7b30cfcd51e68da1530221427 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 12:41:34 +0100 Subject: [PATCH 0922/1208] update linx to 1.17 --- hmftools.wdl | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index caafa440..810685bf 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -529,8 +529,6 @@ task Linx { String outputDir = "./linx" File fragileSiteCsv File lineElementCsv - File replicationOriginsBed - File viralHostsCsv File knownFusionCsv File driverGenePanel #The following should be in the same directory. @@ -539,10 +537,10 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "5G" - String javaXmx = "4G" + String memory = "9G" + String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.16--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.17--hdfd78af_0" } command { @@ -554,9 +552,7 @@ task Linx { -output_dir ~{outputDir} \ -fragile_site_file ~{fragileSiteCsv} \ -line_element_file ~{lineElementCsv} \ - -replication_origins_file ~{replicationOriginsBed} \ - -viral_hosts_file ~{viralHostsCsv} \ - -gene_transcripts_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ + -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \ -check_fusions \ -known_fusion_file ~{knownFusionCsv} \ -check_drivers \ @@ -598,12 +594,10 @@ task Linx { svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"} svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"} purpleOutput: {description: "The files produced by PURPLE.", category: "required"} - refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} outputDir: {description: "The directory the outputs will be written to.", category: "required"} fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"} lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} - replicationOriginsBed: {description: "Replication timing input in BED format with replication timing as the 4th column.", category: "required"} - viralHostsCsv: {description: "A list of the viruses which were used for annotation of the GRIDSS results.", category: "required"} knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} From d6bfc449dfc6979511e746a52f6fddf0e30e7853 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 12:47:06 +0100 Subject: [PATCH 0923/1208] Speed up CI by using conda caching and only checking changed files Squashed commit of the following: commit 7fa743cc028b8e2c86bde49244834ee13c13e95b Author: Ruben Vorderman Date: Fri Feb 11 12:34:33 2022 +0100 Add comment about activate environment commit 2de7802e03f90cd6e26b3d8287fcb0c6b8b81d11 Author: Ruben Vorderman Date: Fri Feb 11 12:26:38 2022 +0100 Invalidate cache commit 8ca394d41361acf2511249e3e29688baf0705004 Author: Ruben Vorderman Date: Fri Feb 11 12:26:27 2022 +0100 Consolidate steps commit 31d09c6f0e86d4625bfa3a6e94a7ced910c7410c Author: Ruben Vorderman Date: Fri Feb 11 11:27:53 2022 +0100 Use correct path for caching commit 7e1374ed323bb38d674da09d7270def4a2192d00 Author: Ruben Vorderman Date: Fri Feb 11 11:22:10 2022 +0100 Do not cache conda packages commit deffd8a0776e15a4df58a1398fcbcb8b0f1430f0 Author: Ruben Vorderman Date: Fri Feb 11 11:20:59 2022 +0100 Remove unnecessary whitespace commit 8e97bcd4dfd8ee459a23f1931465875c0a41fd49 Author: Ruben Vorderman Date: Fri Feb 11 11:20:03 2022 +0100 Remove debugging task commit 8338cd4b843245d781d7028f1f1acad45c8c7d0d Author: Ruben Vorderman Date: Fri Feb 11 11:17:18 2022 +0100 Try to change path commit 6a75baa36eee340d7a6d766c89163e960a6203b0 Author: Ruben Vorderman Date: Fri Feb 11 11:12:18 2022 +0100 Delete path line in current github env commit cbbb9fe67cb796a010c01760ca2e05986f979ced Author: Ruben Vorderman Date: Fri Feb 11 11:05:50 2022 +0100 Properly activate commit 671568b7c8d79a5141429068a32b72814110b361 Author: Ruben Vorderman Date: Fri Feb 11 10:59:46 2022 +0100 Also printenv commit 4c8945e8d5305753482538389ddc8af892f493f9 Author: Ruben Vorderman Date: Fri Feb 11 10:56:45 2022 +0100 Manual activate commit a925c53a99836e81eb0e2b21075356370906c641 Author: Ruben Vorderman Date: Fri Feb 11 10:53:10 2022 +0100 Reset cache number commit 645ed2b4504d067ea1b26a0922943ef3d5c34622 Author: Ruben Vorderman Date: Fri Feb 11 10:51:09 2022 +0100 Activate environment path commit 5852d29fb538b80f06a738677e7ae271c6c57fa3 Author: Ruben Vorderman Date: Fri Feb 11 10:31:07 2022 +0100 Proper setting for cache commit 83f14a939d662d628ca47dc7b82bbc114f164541 Author: Ruben Vorderman Date: Fri Feb 11 10:03:45 2022 +0100 List environments commit 59267fbba267c0b1726733e390ff471d7012cefa Author: Ruben Vorderman Date: Fri Feb 11 10:01:58 2022 +0100 Activate environment manually commit 0a4d2cd5644407308fcc78356a8aef55de86c0c6 Author: Ruben Vorderman Date: Fri Feb 11 09:57:32 2022 +0100 List environments commit 0bc8fa939eb35a6eb352bb58b1235efecd34056f Author: Ruben Vorderman Date: Fri Feb 11 09:52:02 2022 +0100 Add mambaforge comment commit 719d92a0b5245be891d1b5c0eb38d8048abdc5a1 Author: Ruben Vorderman Date: Fri Feb 11 09:44:18 2022 +0100 Use normal conda, since environment is cached commit e5efbb75109f40cfa8b7b33280ec9707a31970d1 Author: Ruben Vorderman Date: Fri Feb 11 09:38:21 2022 +0100 Also cache environments.txt commit 4fa66afb6606ceeb7be577df9f20704d96fc3af0 Author: Ruben Vorderman Date: Fri Feb 11 09:34:46 2022 +0100 Check home commit 2ac42e42829141650585780d27f39d06ebaf8f75 Author: Ruben Vorderman Date: Wed Feb 9 17:00:27 2022 +0100 Add an annoying but effective manual check commit 78d88eae8cb3d1ca44709ce90bcffeb7c5786c1b Author: Ruben Vorderman Date: Wed Feb 9 16:54:29 2022 +0100 Cache correct path commit c05c94561785b1d5e198588dc210313014f3913d Author: Ruben Vorderman Date: Wed Feb 9 16:45:51 2022 +0100 Rename workflow commit 1c67f010c589c1c1fb407ac32e8ed74afdb3ddfd Author: Ruben Vorderman Date: Wed Feb 9 16:45:05 2022 +0100 Use correct quotes commit 7f9d2e559697e9d9d1f6df3514c8269612e7bcee Author: Ruben Vorderman Date: Wed Feb 9 16:42:25 2022 +0100 Only check changed wdl files commit 0e2a15b38e206fdb96d2d8b225999d6e5c9e6e73 Author: Ruben Vorderman Date: Wed Feb 9 16:34:35 2022 +0100 remove v parameter commit 89348dde8a84cd1d935999255c64428c99db7042 Author: Ruben Vorderman Date: Wed Feb 9 16:19:02 2022 +0100 Remove newline commit 752b8cb4a8407908348d8424fdc4b89d3219fdad Author: Ruben Vorderman Date: Wed Feb 9 16:17:33 2022 +0100 Git fetch develop commit 9216a3f846268ba00d0fe922055536b06dc975b3 Author: Ruben Vorderman Date: Wed Feb 9 15:53:13 2022 +0100 Specifically check origin commit b54c140de4fc0bf31d7c95384831aedb253f35a3 Author: Ruben Vorderman Date: Wed Feb 9 15:44:50 2022 +0100 Only chek files that are different from the base with womtool validate commit d963818753272aa18311d3d29276c3db6241e85d Author: Ruben Vorderman Date: Wed Feb 9 15:33:50 2022 +0100 Correctly use data commit 8113bfdd2e1feda6047e13da79885a3131c000e6 Author: Ruben Vorderman Date: Wed Feb 9 15:32:48 2022 +0100 Set correct env cache param commit 4f7af2ed0365887be9147954290c4b807673afdd Author: Ruben Vorderman Date: Wed Feb 9 15:30:23 2022 +0100 Add lint-evnironment commit b026b5a8a77ea131b229a50cb28e0d301915cfb8 Author: Ruben Vorderman Date: Wed Feb 9 15:28:50 2022 +0100 Use mamba env update commit 41fda1a9f52d56578a76f8bf185db86da2128a0e Author: Ruben Vorderman Date: Wed Feb 9 15:25:21 2022 +0100 Use cache commit fd1a64261bea956b6b31a26f5eaa38ce4a63121c Author: Ruben Vorderman Date: Wed Feb 9 14:31:17 2022 +0100 Add missing done statement commit 4a64eb43535f48e0558ba6c5dc408178784ef207 Merge: a36a227 f234b0e Author: Ruben Vorderman Date: Wed Feb 9 14:23:48 2022 +0100 Merge branch 'develop' into BIOWDL-583 commit a36a2274116732bc8e3229a267fe35ee4d61e7da Author: Ruben Vorderman Date: Wed Feb 9 14:23:26 2022 +0100 Implement all checks in lint.yml directly commit 391bb0de9619e75293599a1be1d24322fd466f4c Author: Ruben Vorderman Date: Wed Feb 9 14:11:33 2022 +0100 Use a separate lint file commit 832a131cee403ec0ac7d983d6e82fd567ce1b246 Author: Ruben Vorderman Date: Tue Dec 14 16:32:30 2021 +0100 Use mamba-forge and mamba to install dependencies --- .github/PULL_REQUEST_TEMPLATE.md | 1 + .github/lint-environment.yml | 9 ++++ .github/workflows/ci.yml | 30 ----------- .github/workflows/lint.yml | 93 ++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+), 30 deletions(-) create mode 100644 .github/lint-environment.yml delete mode 100644 .github/workflows/ci.yml create mode 100644 .github/workflows/lint.yml diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3b4ec9ac..372071ee 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,3 +2,4 @@ - [ ] Pull request details were added to CHANGELOG.md. - [ ] Documentation was updated (if required). - [ ] `parameter_meta` was added/updated (if required). +- [ ] Submodule branches are on develop or a tagged commit. diff --git a/.github/lint-environment.yml b/.github/lint-environment.yml new file mode 100644 index 00000000..63b538fc --- /dev/null +++ b/.github/lint-environment.yml @@ -0,0 +1,9 @@ +name: biowdl-lint +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - cromwell + - wdl-aid + - miniwdl diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index 78566111..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Continuous integration - -on: - pull_request: - paths_ignore: - - "docs/**" - -defaults: - run: - # This is needed for miniconda, see: - # https://github.com/marketplace/actions/setup-miniconda#important - shell: bash -l {0} - -jobs: - lint: - runs-on: ubuntu-latest - name: Womtool validate and submodule up to date. - steps: - - uses: actions/checkout@v2.3.4 - with: - submodules: recursive - - name: install miniconda - uses: conda-incubator/setup-miniconda@v2.0.1 - with: - channels: conda-forge,bioconda,defaults - # Conda-incubator uses 'test' environment by default. - - name: install requirements - run: conda install -n test cromwell miniwdl wdl-aid - - name: run linting - run: bash scripts/biowdl_lint.sh diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..e6edbbab --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,93 @@ +name: Linting + +on: + pull_request: + paths_ignore: + - "docs/**" + +defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important + shell: bash -l {0} + +jobs: + lint: + runs-on: ubuntu-latest + name: Linting checks + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + + - name: Set cache date + run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV + + - name: Cache conda environment + uses: actions/cache@v2.1.7 + env: + # Increase this value to manually invalidate the cache + CACHE_NUMBER: 0 + with: + path: /usr/share/miniconda/envs/biowdl-lint + key: + ${{runner.os}}-biowdl-lint-${{ env.CACHE_NUMBER }}-${{env.DATE}}-${{ hashFiles('.github/lint-environment.yml') }} + id: env_cache + + # Use the builtin conda. This is the fastest installation. It may not be + # the fastest for resolving, but the package cache mitigates that problem. + # Since this installs fastest, it is fastest for all runs where a cache + # hit occurs. + - name: install miniconda + uses: conda-incubator/setup-miniconda@v2.1.1 + with: + channels: conda-forge,bioconda,defaults + channel-priority: strict + auto-activate-base: false + use-only-tar-bz2: true # Needed for proper caching according to the documentation. + # activate-environment is broken! This always seems to create a new environment. + # Activation is therefore done separately. + + - name: Create test environment if no cache is present + run: conda env create -n biowdl-lint -f .github/lint-environment.yml + if: steps.env_cache.outputs.cache-hit != 'true' + + - name: Activate test environment + # The new PATH should be passed to the environment, otherwise it won't register. + run: | + conda activate biowdl-lint + echo "PATH=$PATH" >> $GITHUB_ENV + + - name: Fetch develop branch for comparisons + run: git fetch --depth=1 origin develop + + - name: run womtool validate + # Only check files that have changed from the base reference. + # Womtool validate checks very slowly, so this saves a lot of time. + run: | + bash -c " + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + womtool validate $WDL_FILE + done + " + - name: run miniwdl check + run: bash -c 'miniwdl check $(git ls-files *.wdl)' + + - name: Check copyright headers + run: | + bash -c ' + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" + done + ' + - name: Check parameter_meta for inputs + run: | + bash -c " + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || + if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr + then + exit 1 + fi + done + " From 54337a3c99596e48149d0d2522cc79c0a7b379e9 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 11 Feb 2022 13:38:34 +0100 Subject: [PATCH 0924/1208] update peach to 1.5 --- peach.wdl | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/peach.wdl b/peach.wdl index af44daec..6a5770f4 100644 --- a/peach.wdl +++ b/peach.wdl @@ -22,7 +22,6 @@ version 1.0 task Peach { input { - File transcriptTsv File germlineVcf File germlineVcfIndex String tumorName @@ -31,28 +30,26 @@ task Peach { File panelJson String memory = "2G" - String dockerImage = "quay.io/biowdl/peach:v1.0" + String dockerImage = "quay.io/biowdl/peach:v1.5" Int timeMinutes = 5 } command { + set -e + mkdir -p ~{outputDir} peach \ - --recreate_bed \ - --transcript_tsv ~{transcriptTsv} \ - ~{germlineVcf} \ - ~{tumorName} \ - ~{normalName} \ - 1.0 \ - ~{outputDir} \ - ~{panelJson} \ - vcftools + -vcf ~{germlineVcf} \ + --sample_t_id ~{tumorName} \ + --sample_r_id ~{normalName} \ + --tool_version 1.5 \ + --outputDir ~{outputDir} \ + --panel } output { File callsTsv = "~{outputDir}/~{tumorName}.peach.calls.tsv" - File filteredVcf = "~{outputDir}/~{tumorName}.peach.filtered.vcf" File genotypeTsv = "~{outputDir}/~{tumorName}.peach.genotype.tsv" - Array[File] outputs = [callsTsv, filteredVcf, genotypeTsv] + Array[File] outputs = [callsTsv, genotypeTsv] } runtime { @@ -62,7 +59,6 @@ task Peach { } parameter_meta { - transcriptTsv: {description: "A tsv file describing transcripts.", category: "required"} germlineVcf: {description: "The germline VCF file from hmftools' purple.", category: "required"} germlineVcfIndex: {description: "The germline VCF's index.", category: "required"} tumorName: {description: "The name of the tumor sample.", category: "required"} @@ -74,4 +70,4 @@ task Peach { memory: {description: "The amount of memory available to the job.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } -} \ No newline at end of file +} From adb8a68ce8fff78613ee95451db821363b74353b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 16:42:36 +0100 Subject: [PATCH 0925/1208] Debug task --- .github/workflows/lint.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e6edbbab..622e0581 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -66,6 +66,7 @@ jobs: # Womtool validate checks very slowly, so this saves a lot of time. run: | bash -c " + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do womtool validate $WDL_FILE done From 37faa1b46883bb93c6e926141d6145b3ead9fafd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 16:50:07 +0100 Subject: [PATCH 0926/1208] Use heredoc script --- .github/workflows/lint.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 622e0581..7eb6fe5d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -65,25 +65,25 @@ jobs: # Only check files that have changed from the base reference. # Womtool validate checks very slowly, so this saves a lot of time. run: | - bash -c " + bash <<- SCRIPT set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do womtool validate $WDL_FILE done - " + SCRIPT - name: run miniwdl check run: bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | - bash -c ' + bash <<- SCRIPT for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" done - ' + SCRIPT - name: Check parameter_meta for inputs run: | - bash -c " + bash <<- SCRIPT for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr @@ -91,4 +91,4 @@ jobs: exit 1 fi done - " + SCRIPT From 7d8cadf598e9359e6ea6d9822fe63210f026acfe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:06:44 +0100 Subject: [PATCH 0927/1208] Use always upload cache --- .github/workflows/lint.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7eb6fe5d..11bf7a40 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -24,7 +24,8 @@ jobs: run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV - name: Cache conda environment - uses: actions/cache@v2.1.7 + # Use an always upload cache to prevent solving conda environment again and again on failing linting. + uses: pat-s/always-upload-cache@v2.1.5 env: # Increase this value to manually invalidate the cache CACHE_NUMBER: 0 From eba0865e6865217ed34de9e04ac0f4c1b86f9435 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:11:34 +0100 Subject: [PATCH 0928/1208] Run stuff directly in bash --- .github/workflows/lint.yml | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 11bf7a40..61e3d99f 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -66,30 +66,24 @@ jobs: # Only check files that have changed from the base reference. # Womtool validate checks very slowly, so this saves a lot of time. run: | - bash <<- SCRIPT - set -x - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - womtool validate $WDL_FILE - done - SCRIPT + set -x + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + womtool validate $WDL_FILE + done - name: run miniwdl check run: bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | - bash <<- SCRIPT - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" - done - SCRIPT + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" + done - name: Check parameter_meta for inputs run: | - bash <<- SCRIPT - for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do - wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || - if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr - then - exit 1 - fi - done - SCRIPT + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || + if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr + then + exit 1 + fi + done From e72270755a25b5259f99d6e1855bf10926a2dc5d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Feb 2022 17:14:27 +0100 Subject: [PATCH 0929/1208] Use set -x to better see what happens --- .github/workflows/lint.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 61e3d99f..7ef19e58 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -71,15 +71,19 @@ jobs: womtool validate $WDL_FILE done - name: run miniwdl check - run: bash -c 'miniwdl check $(git ls-files *.wdl)' + run: | + set -x + bash -c 'miniwdl check $(git ls-files *.wdl)' - name: Check copyright headers run: | + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" done - name: Check parameter_meta for inputs run: | + set -x for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr From 868f3617f22d28ae6855ed8c5d75fd76c967a5db Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 14 Feb 2022 10:51:20 +0100 Subject: [PATCH 0930/1208] Add format parameter to parameter_meta --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index e17d613b..2afe3bbe 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -67,6 +67,6 @@ task PeakCalling { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - + format: {description: "Which format to use. Use BAMPE for paired-end reads.", category: "common"} } } From 0f6d75c76ed78cc1847acc732fd78ca44b2646a6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 14 Feb 2022 17:04:49 +0100 Subject: [PATCH 0931/1208] fix some issues --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c0c835b5..27badc9b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -312,8 +312,8 @@ task Gripss { output { File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz" File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi" - File filteredVcf = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz" - File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.filtered.vcf.gz.tbi" + File filteredVcf = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz" + File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz.tbi" } runtime { @@ -629,7 +629,7 @@ task Pave { File referenceFasta File referenceFastaFai File referenceFastaDict - File refGenomeVersion + String refGenomeVersion File driverGenePanel #The following should be in the same directory. File geneDataCsv From b72d2fcff910a8a7cf3c1103f90bcf2974b75b4c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 09:16:17 +0100 Subject: [PATCH 0932/1208] fix Pave output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index b349038d..36909ee4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -652,7 +652,7 @@ task Pave { output { File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" - File outputVcfIndex = "~{outputVcf}.tbi" + File outputVcfIndex = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz.tbi')}" } runtime { From 0554cfe785f39b9e1ebfef4a2dda7450a4ed749b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 09:36:33 +0100 Subject: [PATCH 0933/1208] fix copy paste error --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 36909ee4..c9745b57 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -651,8 +651,8 @@ task Pave { } output { - File outputVcf = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz')}" - File outputVcfIndex = "~{outputDir}/~{sub(basename(geneDataCsv), 'vcf.gz$', 'pave.vcf.gz.tbi')}" + File outputVcf = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz')}" + File outputVcfIndex = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz.tbi')}" } runtime { From 92d964d52ea3d64f7f927f6b41933098c4ec3678 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 12:33:02 +0100 Subject: [PATCH 0934/1208] fix purple outputs --- hmftools.wdl | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c9745b57..2015c125 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -838,11 +838,13 @@ task Purple { } output { - File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv" + File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv" File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv" - File purpleCnvGermlineTsv = "~{outputDir}/~{tumorName}.purple.cnv.germline.tsv" File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv" + File purpleGermlineDeletionTsv = "~{outputDir}/~{tumorName}.purple.germline.deletion.tsv" + File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" + File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv" File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv" File purpleQc = "~{outputDir}/~{tumorName}.purple.qc" @@ -851,10 +853,9 @@ task Purple { File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv" File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz" File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi" - File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz" - File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi" File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz" File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi" + File purpleVersion = "~{outputDir}/purple.version" File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png" File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png" File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png" @@ -863,19 +864,19 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File purpleVersion = "~{outputDir}/purple.version" + File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" - File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" - File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" - File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" - File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" - File circosGaps = "~{outputDir}/circos/gaps.txt" File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" + File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos" + File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos" File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf" + File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos" File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos" + File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos" File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos" - Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvGermlineTsv, + File circosGaps = "~{outputDir}/circos/gaps.txt" + Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv, purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc, purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, From d554e60c08dee3597680cb18d9eee67201aba5ac Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 13:16:33 +0100 Subject: [PATCH 0935/1208] fix peach command --- peach.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/peach.wdl b/peach.wdl index 6a5770f4..bd8375d7 100644 --- a/peach.wdl +++ b/peach.wdl @@ -43,7 +43,7 @@ task Peach { --sample_r_id ~{normalName} \ --tool_version 1.5 \ --outputDir ~{outputDir} \ - --panel + --panel ~{panelJson} } output { From 54f323f52f7ac0d0fbbab1f893b5f8583d504791 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Feb 2022 13:20:54 +0100 Subject: [PATCH 0936/1208] fix some typos --- peach.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/peach.wdl b/peach.wdl index bd8375d7..d1bc17f8 100644 --- a/peach.wdl +++ b/peach.wdl @@ -38,11 +38,11 @@ task Peach { set -e mkdir -p ~{outputDir} peach \ - -vcf ~{germlineVcf} \ + --vcf ~{germlineVcf} \ --sample_t_id ~{tumorName} \ --sample_r_id ~{normalName} \ --tool_version 1.5 \ - --outputDir ~{outputDir} \ + --outputdir ~{outputDir} \ --panel ~{panelJson} } From c675c91fbc91f932c6f5018986d025993611f8a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 24 Feb 2022 15:14:35 +0100 Subject: [PATCH 0937/1208] fix linx output and health-checker command --- hmftools.wdl | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2015c125..3ab203fb 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -484,8 +484,14 @@ task HealthChecker { -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -output_dir ~{outputDir} - test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' - test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' + if [ -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' ] + then + echo 'true' > '~{outputDir}/succeeded' + fi + if [ -e '~{outputDir}/~{tumorName}.HealthCheckFailed' ] + then + echo 'false' > '~{outputDir}/succeeded' + fi } output { @@ -531,6 +537,7 @@ task Linx { File lineElementCsv File knownFusionCsv File driverGenePanel + Boolean writeAllVisFusions = false #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -540,7 +547,7 @@ task Linx { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 10 - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.17--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } command { @@ -558,7 +565,8 @@ task Linx { -check_drivers \ -driver_gene_panel ~{driverGenePanel} \ -chaining_sv_limit 0 \ - -write_vis_data + -write_vis_data \ + ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""} } output { @@ -569,7 +577,6 @@ task Linx { File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv" File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv" File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv" - File linxViralInserts = "~{outputDir}/~{sampleName}.linx.viral_inserts.tsv" File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv" File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv" File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv" @@ -578,9 +585,9 @@ task Linx { File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv" File linxVersion = "~{outputDir}/linx.version" Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion, - linxLinks, linxSvs, linxViralInserts, linxVisCopyNumber, - linxVisFusion, linxVisGeneExon, linxVisProteinDomain, - linxVisSegments, linxVisSvData, linxVersion] + linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion, + linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData, + linxVersion] } runtime { @@ -600,6 +607,7 @@ task Linx { lineElementCsv: {description: "A list of known LINE source regions.", category: "required"} knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"} driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} + writeAllVisFusions: {description: "Equivalent to the -write_all_vis_fusions flag.", category: "advanced"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} From 5e821d51571d91727357e324cc9283eafce5e427 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 24 Feb 2022 16:26:29 +0100 Subject: [PATCH 0938/1208] fix health checker output --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3ab203fb..9a3bd437 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -495,7 +495,7 @@ task HealthChecker { } output { - Boolean succeeded = read_boolean("result") + Boolean succeeded = read_boolean("succeeded") File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" From f2cc5cc02fb5ed2376969ff745ce0d6741fc32ff Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 11:43:22 +0100 Subject: [PATCH 0939/1208] add LinxVisualisations --- hmftools.wdl | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index 9a3bd437..c852b520 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -622,6 +622,61 @@ task Linx { } } +task LinxVisualisations { + input { + String outputDir = "./linx_visualisation" + String sample + String refGenomeVersion + Array[File]+ linxOutput + Boolean plotReportable = true + + String memory = "9G" + String javaXmx = "8G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -cp /usr/local/share/hmftools-linx-1.18-0/sv-linx.jar \ + com.hartwig.hmftools.linx.visualiser.SvVisualiser \ + -sample ~{sample} \ + -ref_genome_version ~{refGenomeVersion} \ + -circos /usr/local/bin/circos \ + -vis_file_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ + -data_out ~{outputDir}/circos \ + -plot_out ~{outputDir}/plot \ + ~{if plotReportable then "-plot_reportable" else ""} + } + + output { + + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "required"} + sample: {description: "The sample's name.", category: "required"} + refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"} + linxOutput: {description: "The directory containing the linx output.", category: "required"} + plotReportable: {description: "Equivalent to the -plot_reportable flag.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Pave { input { String outputDir = "./" From 8fcd2e2598fbc340abdda2b3a3d56dae04cb6bdf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 12:41:51 +0100 Subject: [PATCH 0940/1208] add linx visualisation output --- hmftools.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index c852b520..c1a824c6 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -632,7 +632,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 10 + Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -647,12 +647,13 @@ task LinxVisualisations { -circos /usr/local/bin/circos \ -vis_file_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \ -data_out ~{outputDir}/circos \ - -plot_out ~{outputDir}/plot \ + -plot_out ~{outputDir}/plots \ ~{if plotReportable then "-plot_reportable" else ""} } output { - + Array[File] circos = glob("~{outputDir}/circos/*") + Array[File] plots = glob("~{outputDir}/plots/*" } runtime { From 97c9681b4d10a9fc5d7c2b930df9e69cba85d07c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 12:50:28 +0100 Subject: [PATCH 0941/1208] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index c1a824c6..7f739311 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -653,7 +653,7 @@ task LinxVisualisations { output { Array[File] circos = glob("~{outputDir}/circos/*") - Array[File] plots = glob("~{outputDir}/plots/*" + Array[File] plots = glob("~{outputDir}/plots/*") } runtime { From 2467174555e85c5b4cf819018afd44a8b5f24af8 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Feb 2022 14:19:08 +0100 Subject: [PATCH 0942/1208] update virus-interpreter to 1.2 --- hmftools.wdl | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 7f739311..65187f44 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1099,25 +1099,29 @@ task Sage { task VirusInterpreter { input { String sampleId + File purplePurityTsv + File prupleQcFile + File tumorSampleWgsMetricsFile File virusBreakendTsv File taxonomyDbTsv - File virusInterpretationTsv - File virusBlacklistTsv + File virusReportingDbTsv String outputDir = "." String memory = "3G" String javaXmx = "2G" Int timeMinutes = 15 - String dockerImage = "quay.io/biowdl/virus-interpreter:1.0" + String dockerImage = "quay.io/biowdl/virus-interpreter:1.2" } command { - virus-interpreter -Xmx~{javaXmx} \ + virus-interpreter -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ -sample_id ~{sampleId} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{prupleQcFile} \ + -tumor_sample_wgs_metrics_file ~{tumorSampleWgsMetricsFile} \ -virus_breakend_tsv ~{virusBreakendTsv} \ -taxonomy_db_tsv ~{taxonomyDbTsv} \ - -virus_interpretation_tsv ~{virusInterpretationTsv} \ - -virus_blacklist_tsv ~{virusBlacklistTsv} \ + -virus_reporting_db_tsv ~{virusReportingDbTsv} \ -output_dir ~{outputDir} } @@ -1133,10 +1137,12 @@ task VirusInterpreter { parameter_meta { sampleId: {description: "The name of the sample.", category: "required"} + purplePurityTsv: {description: "The purity file produced by purple.", category: "required"} + prupleQcFile: {description: "The QC file produced by purple.", category: "required"} + tumorSampleWgsMetricsFile: {description: "The picard WGS metrics file for this sample.", category: "required"} virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"} taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"} - virusInterpretationTsv: {description: "A virus interpretation tsv.", category: "required"} - virusBlacklistTsv: {description: "A virus blacklist tsv.", category: "required"} + virusReportingDbTsv: {description: "A virus reporting tsv.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "required"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", From b76866a2fbe5c23961f63dfa6b68697cf3c23126 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 1 Mar 2022 15:37:19 +0100 Subject: [PATCH 0943/1208] update protect to 2.0 --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 65187f44..da9c6fd5 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -632,7 +632,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 60 + Int timeMinutes = 1440 #FIXME String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -777,7 +777,7 @@ task Protect { String memory = "9G" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biowdl/protect:v1.4" + String dockerImage = "quay.io/biowdl/protect:v2.0" } command { From 513e64560afa2a532a791289e5ef77a90006aa50 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Mar 2022 10:23:07 +0100 Subject: [PATCH 0944/1208] fix health-checker --- hmftools.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 76620e3c..27b31bca 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -409,13 +409,19 @@ task HealthChecker { -tum_wgs_metrics_file ~{tumorMetrics} \ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \ -output_dir ~{outputDir} - test -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' && echo 'true' > '~{outputDir}/succeeded' - test -e '~{outputDir}/~{tumorName}.HealthCheckFailed' && echo 'false' > '~{outputDir}/succeeded' + if [ -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' ] + then + echo 'true' > '~{outputDir}/succeeded' + fi + if [ -e '~{outputDir}/~{tumorName}.HealthCheckFailed' ] + then + echo 'false' > '~{outputDir}/succeeded' + fi } output { - Boolean succeeded = read_boolean("result") - File outputFile = if succeeded + Boolean succeeded = read_boolean("succeeded") + File outputFile = if succeeded then "~{outputDir}/~{tumorName}.HealthCheckSucceeded" else "~{outputDir}/~{tumorName}.HealthCheckFailed" } From 652735023d7a71738b0ccea450e4fedd27e41830 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 2 Mar 2022 12:42:00 +0100 Subject: [PATCH 0945/1208] update cuppa to 1.6 --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index da9c6fd5..277c8dd4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -184,7 +184,7 @@ task Cuppa { String javaXmx = "4G" String memory = "5G" Int timeMinutes = 10 - String dockerImage = "quay.io/biowdl/cuppa:1.4" + String dockerImage = "quay.io/biowdl/cuppa:1.6" } command { @@ -240,7 +240,7 @@ task CuppaChart { String memory = "4G" Int timeMinutes = 5 - String dockerImage = "quay.io/biowdl/cuppa:1.4" + String dockerImage = "quay.io/biowdl/cuppa:1.6" } command { From d5294222e69c6e793ea0d13e448e67b9482e5a10 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 3 Mar 2022 15:50:52 +0100 Subject: [PATCH 0946/1208] add orange, cupGenerateReport and (hopefully) fix sage plots --- hmftools.wdl | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 208 insertions(+), 4 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 277c8dd4..75fd2d19 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -168,6 +168,62 @@ task Cobalt { } } +task CupGenerateReport { + input { + String sampleName + File cupData + String outputDir = "./cuppa" + + String memory = "5G" + Int timeMinutes = 10 + String dockerImage = "quay.io/biowdl/cuppa:1.6" + } + + # This script writes to the directory that the input is located in. + # Giving the input directly will cause the script to write in the + # locallized input dir, which may cause issues with write permissions + # in certain execution engines or backends. We, therefore, make links + # to a working directory, and give that directory as input instead. + # We can't just use the outputDir directly. This could be an + # absolute path in which case the linking might fail due to name + # collisions. Outputs are copied to the given output dir afterwards. + command { + set -e + mkdir -p ./workdir ~{outputDir} + ln -s -t workdir ~{sep=" " cupData} + CupGenerateReport \ + ~{sampleName} \ + workdir + mv -t ~{outputDir} \ + ./workdir/~{sampleName}.cup.report.summry.png \ + ./workdir/~{sampleName}.cup.report.features.png \ + ./workdir/~{sampleName}_cup.report.pdf + } + + output { + File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summry.png" + File featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" + File reportPdf = "~{outputDir}/~{sampleName}_cup.report.pdf" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + sampleName: {description: "The sample id.", category: "required"} + cupData: {description: "The output produced by cuppa.", category: "required"} + outputDir: {description: "The directory the ouput will be placed in.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Cuppa { input { Array[File]+ linxOutput @@ -632,7 +688,7 @@ task LinxVisualisations { String memory = "9G" String javaXmx = "8G" - Int timeMinutes = 1440 #FIXME + Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" } @@ -678,6 +734,151 @@ task LinxVisualisations { } } +task Orange { + input { + String outputDir = "./orange" + File doidJson + Array[String] sampleDoids + String tumorName + String referenceName + File referenceMetrics + File tumorMetrics + File referenceFlagstats + File tumorFlagstats + File sageGermlineGeneCoverageTsv + File sageSomaticRefSampleBqrPlot + File sageSomaticTumorSampleBqrPlot + File purpleGeneCopyNumberTsv + File purpleGermlineDriverCatalogTsv + File purpleGermlineVariantVcf + File purpleGermlineVariantVcfIndex + Array[File]+ purplePlots + File purplePurityTsv + File purpleQcFile + File purpleSomaticDriverCatalogTsv + File purpleSomaticVariantVcf + File purpleSomaticVariantVcfIndex + File linxFusionTsv + File linxBreakendTsv + File linxDriverCatalogTsv + File linxDriverTsv + Array[File]+ linxPlots + File cuppaResultCsv + File cuppaSummaryPlot + File cuppaFeaturePlot + File chordPredictionTxt + File peachGenotypeTsv + File protectEvidenceTsv + File annotatedVirusTsv + #File pipelineVersionFile + File cohortMappingTsv + File cohortPercentilesTsv + + String memory = "17G" + String javaXmx = "16G" + Int timeMinutes = 1440 #FIXME + String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + } + + command { + set -e + mkdir -p ~{outputDir} + orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + -output_dir ~{outputDir} \ + -doid_json ~{doidJson} \ + -primary_tumor_doids '~{sep=";" sampleDoids}' \ + -max_evidence_level C \ + -tumor_sample_id ~{tumorName} \ + -reference_sample_id ~{referenceName} \ + -ref_sample_wgs_metrics_file ~{referenceMetrics} \ + -tumor_sample_wgs_metrics_file ~{tumorMetrics} \ + -ref_sample_flagstat_file ~{referenceFlagstats} \ + -tumor_sample_flagstat_file ~{tumorFlagstats} \ + -sage_germline_gene_coverage_tsv ~{sageGermlineGeneCoverageTsv} \ + -sage_somatic_ref_sample_bqr_plot ~{sageSomaticRefSampleBqrPlot} \ + -sage_somatic_tumor_sample_bqr_plot ~{sageSomaticTumorSampleBqrPlot} \ + -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \ + -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \ + -purple_germline_variant_vcf ~{purpleGermlineVariantVcf} \ + -purple_plot_directory ~{sub(purplePlots[0], basename(purplePlots[0]), "")} \ + -purple_purity_tsv ~{purplePurityTsv} \ + -purple_qc_file ~{purpleQcFile} \ + -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \ + -purple_somatic_variant_vcf ~{purpleSomaticVariantVcf} \ + -linx_fusion_tsv ~{linxFusionTsv} \ + -linx_breakend_tsv ~{linxBreakendTsv} \ + -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \ + -linx_driver_tsv ~{linxDriverTsv} \ + -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ + -cuppa_result_csv ~{cuppaResultCsv} \ + -cuppa_summary_plot ~{cuppaSummaryPlot} \ + -cuppa_feature_plot ~{cuppaFeaturePlot} \ + -chord_prediction_txt ~{chordPredictionTxt} \ + -peach_genotype_tsv ~{peachGenotypeTsv} \ + -protect_evidence_tsv ~{protectEvidenceTsv} \ + -annotated_virus_tsv ~{annotatedVirusTsv} \ + -cohort_mapping_tsv ~{cohortMappingTsv} \ + -cohort_percentiles_tsv ~{cohortPercentilesTsv} + } + #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile} + + output { + File orangeJson = "~{outputDir}/~{tumorName}.orange.json" + File orangePdf = "~{outputDir}/~{tumorName}.orange.pdf" + } + + runtime { + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + memory: memory + } + + parameter_meta { + outputDir: {description: "The directory the outputs will be written to.", category: "common"} + doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"} + sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"} + tumorName: {description: "The name of the tumor sample.", category: "required"} + referenceName: {description: "The name of the normal sample.", category: "required"} + referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"} + tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"} + referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"} + tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"} + sageGermlineGeneCoverageTsv: {description: "Gene coverage file produced by the germline sage run.", category: "required"} + sageSomaticRefSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"} + sageSomaticTumorSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"} + purpleGeneCopyNumberTsv: {description: "Copy number tsv produced by purple.", category: "required"} + purpleGermlineDriverCatalogTsv: {description: "Germline driver catalog produced by purple.", category: "required"} + purpleGermlineVariantVcf: {description: "Germline variant vcf produced by purple.", category: "required"} + purplePlots: {description: "The plots generated by purple.", category: "required"} + purplePurityTsv: {description: "The purity file produced by purple.", category: "required"} + purpleQcFile: {description: "The qc file produced by purple.", category: "required"} + purpleSomaticDriverCatalogTsv: {description: "Somatic driver catalog produced by purple.", category: "required"} + purpleSomaticVariantVcf: {description: "Somatic variant vcf produced by purple.", category: "required"} + linxFusionTsv: {description: "The fusions tsv produced by linx.", category: "required"} + linxBreakendTsv: {description: "The breakend tsv produced by linx.", category: "required"} + linxDriverCatalogTsv: {description: "The driver catalog produced by linx.", category: "required"} + linxDriverTsv: {description: "The driver tsv produced by linx.", category: "required"} + linxPlots: {description: "The plots generated by linx.", category: "required"} + cuppaResultCsv: {description: "The cuppa results csv.", category: "required"} + cuppaSummaryPlot: {description: "The cuppa summary plot.", category: "required"} + cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "required"} + chordPredictionTxt: {description: "Chord prediction results.", category: "required"} + peachGenotypeTsv: {description: "Genotype tsv produced by peach.", category: "required"} + protectEvidenceTsv: {description: "Evidence tsv produced by protect.", category: "required"} + annotatedVirusTsv: {description: "Annotated virus tsv produced by virus-interpreter.", category: "required"} + #pipelineVersionFile: {description: "", category: "required"} + cohortMappingTsv: {description: "Cohort mapping file from the HMFTools resources.", category: "required"} + cohortPercentilesTsv: {description: "Cohort percentile file from the HMFTools resources.", category: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Pave { input { String outputDir = "./" @@ -1024,7 +1225,7 @@ task Sage { String javaXmx = "50G" String memory = "51G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) - String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } command { @@ -1054,8 +1255,11 @@ task Sage { output { File outputVcf = outputPath File outputVcfIndex = outputPath + ".tbi" - # There is some plots as well, but in the current container the labels in the plots are just series of `□`s. - # This seems to be a systemic issue with R generated plots in biocontainers... + File? referenceSageBqrPng = "~{referenceName}.sage.bqr.png" + File? referenceSageBqrTsv = "~{referenceName}.sage.bqr.tsv" + File tumorSageBqrPng = "~{tumorName}.sage.bqr.png" + File tumorSageBqrTsv = "~{tumorName}.sage.bqr.tsv" + File sageGeneCoverageTsv = "~{tumorName}.sage.gene.coverage.tsv" } runtime { From 960aa3cf0a713b6d7870b33c529e22b98b711aea Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Mar 2022 15:06:49 +0100 Subject: [PATCH 0947/1208] Slightly less records in RAM --- picard.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/picard.wdl b/picard.wdl index e81cd4e3..436369d7 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1038,7 +1038,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String outputPath String outputPathMetrics = outputPath + ".metrics" String outputPathUmiMetrics = outputPath + ".umi-metrics" - Int maxRecordsInRam = 3000000 # Default is 500_000 but that will lead to very small files on disk. + Int maxRecordsInRam = 1500000 # Default is 500_000 but that will lead to very small files on disk. String? assumeSortOrder String tempdir = "temp" Boolean removeDuplicates = true From cf0b105cdf0a2ad7a2c1354857c281c18150a36b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 7 Mar 2022 10:32:35 +0100 Subject: [PATCH 0948/1208] Add missing whitespace. Co-authored-by: Davy Cats --- umi.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/umi.wdl b/umi.wdl index a32d646a..0dc5c55e 100644 --- a/umi.wdl +++ b/umi.wdl @@ -34,7 +34,9 @@ task BamReadNameToUmiTag { Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" } + String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai") + command <<< python < Date: Mon, 7 Mar 2022 12:15:51 +0100 Subject: [PATCH 0949/1208] Add parameter_meta for useSoftclippingforSupplementary --- bwa.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/bwa.wdl b/bwa.wdl index 1cb170b7..373de628 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -94,6 +94,7 @@ task Mem { outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"} sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"} usePostalt: {description: "Whether to use the postalt script from bwa kit."} + useSoftclippingForSupplementary: {description: "Use soft-clipping for supplementary alignments instead of hard-clipping", category: "common"} sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"} compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"} readgroup: {description: "A readgroup identifier.", category: "common"} From b070d3efbfcbd41ca3545a2eec0e5bd1a6dc2a3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 7 Mar 2022 12:19:13 +0100 Subject: [PATCH 0950/1208] Add parameter_meta for Picard UmiAwareMarkDuplicatesWithMateCigar --- picard.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/picard.wdl b/picard.wdl index b6d9fadf..eea8d42f 100644 --- a/picard.wdl +++ b/picard.wdl @@ -1160,8 +1160,10 @@ task UmiAwareMarkDuplicatesWithMateCigar { assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"} tempdir: {description: "Temporary directory.", category: "advanced"} compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"} + maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"} useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"} useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 8ccfb0e0d3b3e31ad5aa08fc527ecaa46e77c589 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 7 Mar 2022 13:46:17 +0100 Subject: [PATCH 0951/1208] fix CupGenerateReport --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 75fd2d19..d9dea387 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -193,7 +193,7 @@ task CupGenerateReport { ln -s -t workdir ~{sep=" " cupData} CupGenerateReport \ ~{sampleName} \ - workdir + workdir/ mv -t ~{outputDir} \ ./workdir/~{sampleName}.cup.report.summry.png \ ./workdir/~{sampleName}.cup.report.features.png \ From 799811db76b369b057aa54555e08c3025c6905a0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:38:40 +0100 Subject: [PATCH 0952/1208] fix cupGenerateReport --- hmftools.wdl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index d9dea387..2e294ecd 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -195,15 +195,19 @@ task CupGenerateReport { ~{sampleName} \ workdir/ mv -t ~{outputDir} \ - ./workdir/~{sampleName}.cup.report.summry.png \ - ./workdir/~{sampleName}.cup.report.features.png \ - ./workdir/~{sampleName}_cup.report.pdf + ./workdir/~{sampleName}.cup.report.summary.png \ + ./workdir/~{sampleName}_cup_report.pdf + if [ -f ./workdir/~{sampleName}.cup.report.features.png ] + then + mv -t ~{outputDir} \ + ./workdir/~{sampleName}.cup.report.features.png + fi } output { - File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summry.png" - File featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" - File reportPdf = "~{outputDir}/~{sampleName}_cup.report.pdf" + File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summary.png" + File? featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png" + File reportPdf = "~{outputDir}/~{sampleName}_cup_report.pdf" } runtime { @@ -765,7 +769,7 @@ task Orange { Array[File]+ linxPlots File cuppaResultCsv File cuppaSummaryPlot - File cuppaFeaturePlot + File? cuppaFeaturePlot File chordPredictionTxt File peachGenotypeTsv File protectEvidenceTsv @@ -812,7 +816,7 @@ task Orange { -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \ -cuppa_result_csv ~{cuppaResultCsv} \ -cuppa_summary_plot ~{cuppaSummaryPlot} \ - -cuppa_feature_plot ~{cuppaFeaturePlot} \ + ~{"-cuppa_feature_plot " + cuppaFeaturePlot} \ -chord_prediction_txt ~{chordPredictionTxt} \ -peach_genotype_tsv ~{peachGenotypeTsv} \ -protect_evidence_tsv ~{protectEvidenceTsv} \ @@ -861,7 +865,7 @@ task Orange { linxPlots: {description: "The plots generated by linx.", category: "required"} cuppaResultCsv: {description: "The cuppa results csv.", category: "required"} cuppaSummaryPlot: {description: "The cuppa summary plot.", category: "required"} - cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "required"} + cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "common"} chordPredictionTxt: {description: "Chord prediction results.", category: "required"} peachGenotypeTsv: {description: "Genotype tsv produced by peach.", category: "required"} protectEvidenceTsv: {description: "Evidence tsv produced by protect.", category: "required"} From 5ae1f6de5c3c4efe38a792e3be1104bbacacea3b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:47:30 +0100 Subject: [PATCH 0953/1208] fix copy-paste error (orange docker image) --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 2e294ecd..34941059 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -781,7 +781,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" Int timeMinutes = 1440 #FIXME - String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" + String dockerImage = "quay.io/quay.io/biowdl/orange:v1.6" } command { From 54d70a6b508f4a8360ce995a4bda5f6094225826 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 8 Mar 2022 10:55:02 +0100 Subject: [PATCH 0954/1208] fix copy-paste error --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 34941059..5a480f93 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -781,7 +781,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" Int timeMinutes = 1440 #FIXME - String dockerImage = "quay.io/quay.io/biowdl/orange:v1.6" + String dockerImage = "quay.io/biowdl/orange:v1.6" } command { From 9ca13a0a999ff874d041d26c4860c8c07edbe92d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Mar 2022 10:16:59 +0100 Subject: [PATCH 0955/1208] Remove duplicate options for markduplicates --- picard.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/picard.wdl b/picard.wdl index eea8d42f..3d835829 100644 --- a/picard.wdl +++ b/picard.wdl @@ -726,8 +726,6 @@ task MarkDuplicates { CREATE_INDEX=true \ ADD_PG_TAG_TO_READS=false \ CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \ - USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \ - USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} } output { From ac55982a7acf3c06460ae0b8ac2c394865eeaa4c Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 18 Mar 2022 12:46:53 +0100 Subject: [PATCH 0956/1208] run tabix if vcf index is missing in gridss --- gridss.wdl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index d3d251a5..92d7df1e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -116,6 +116,12 @@ task GRIDSS { ~{normalBam} \ ~{tumorBam} samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai + + # For some reason the VCF index is sometimes missing + if [ ! -e ~{outputPrefix}.vcf.gz.tbi ] + then + tabix ~{outputPrefix}.vcf.gz + fi } output { From 173bb2e6547c1fa4ee20ec5da98368522e18b887 Mon Sep 17 00:00:00 2001 From: dcats Date: Fri, 18 Mar 2022 12:49:26 +0100 Subject: [PATCH 0957/1208] update changelog --- CHANGELOG.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c0db947..b028b60a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ The GRIDSS task will now run tabix separately if GRIDSS doesn't + produce a vcf index. + Added a task for SnpEff. + Adjusted runtime settings for sambamba Markdup. + Added a task for sambamba Flagstat. @@ -28,7 +30,7 @@ version 5.1.0-dev + Sage + VirusInterpreter + Added a task for VirusBreakend. -+ Added a task for GridssAnnotateVcfRepeatmasker. ++ Added a task for GridssAnnotateVcfRepeatmasker. + Bumped GRIDSS version to 2.12.2. + Adjusted GRIDSS runtime settings. + Added optional inputs to GRIDSS: @@ -147,7 +149,7 @@ version 4.0.0 + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. - Using more threads reduces the chance of the samtools sort pipe getting + Using more threads reduces the chance of the samtools sort pipe getting blocked if it's full. + Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl, transcriptclean.wdl to be more descriptive. From 1c02ce1ea5464c11491f9dc67802ab71cb46dbcb Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 12:44:01 +0200 Subject: [PATCH 0958/1208] add task for sv type annotation of gridss results --- gridss.wdl | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 92d7df1e..f771ebe4 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -79,6 +79,69 @@ task AnnotateInsertedSequence { } } +task AnnotateSvType { + input { + File gridssVcf + File gridssVcfIndex + String outputPath = "./gridss.svtyped.vcf" + + String memory = "32G" + String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" + Int timeMinutes = 240 + } + + # Based on https://github.com/PapenfussLab/gridss/issues/74 + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + R --vanilla << EOF + library(VariantAnnotation) + library(StructuralVariantAnnotation) + + vcf_path <- "~{gridssVcf}" + out_path <- "~{outputPath}" + + # Simple SV type classifier + simpleEventType <- function(gr) { + return(ifelse(seqnames(gr) != seqnames(partner(gr)), "BND", # inter-chromosomosal + ifelse(gr$insLen >= abs(gr$svLen) * 0.7, "INS", + ifelse(strand(gr) == strand(partner(gr)), "INV", + ifelse(xor(start(gr) < start(partner(gr)), strand(gr) == "-"), "DEL", + "DUP"))))) + } + + header <- scanVcfHeader(vcf_path) + vcf <- readVcf(vcf_path, seqinfo(header)) + gr <- breakpointRanges(vcf) + svtype <- simpleEventType(gr) + info(vcf[gr$sourceId])$SVTYPE <- svtype + writeVcf(vcf, out_path) + EOF + >>> + + output { + File vcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + gridssVcf: {description: "The VCF produced by GRIDSS.", category: "required"} + gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"} + outputPath: {description: "The path the output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { File tumorBam From 87bb3c4f2104cb3c8a020aa0abfb7f5a4faa387a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 13:32:48 +0200 Subject: [PATCH 0959/1208] copy paste error --- gridss.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index f771ebe4..b38f344e 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -134,8 +134,6 @@ task AnnotateSvType { gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"} outputPath: {description: "The path the output should be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 1b4238c66c6150e57e128086d16d6939a1198406 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 8 Apr 2022 14:14:33 +0200 Subject: [PATCH 0960/1208] typo --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index b38f344e..00705392 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -79,7 +79,7 @@ task AnnotateInsertedSequence { } } -task AnnotateSvType { +task AnnotateSvTypes { input { File gridssVcf File gridssVcfIndex From bd153caa313e5fad73d2716813f7eb02c36b963c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Jun 2022 14:56:45 +0200 Subject: [PATCH 0961/1208] adjust gridss threads --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index d3d251a5..b118af9d 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -95,7 +95,7 @@ task GRIDSS { Int jvmHeapSizeGb = 300 Int nonJvmMemoryGb = 50 - Int threads = 4 + Int threads = 16 Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } From 873ece6f64e85bea10c28754f3260de155cc8d80 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 17 Jun 2022 14:59:35 +0200 Subject: [PATCH 0962/1208] adjust some runtime settings --- bedtools.wdl | 2 +- gridss.wdl | 10 +++++----- hmftools.wdl | 8 ++++---- sambamba.wdl | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/bedtools.wdl b/bedtools.wdl index 1d956cab..80a281d6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -76,7 +76,7 @@ task Coverage { String outputPath = "./coverage.tsv" String memory = "8G" - Int timeMinutes = 120 + Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } diff --git a/gridss.wdl b/gridss.wdl index b118af9d..c1a41a25 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -93,9 +93,9 @@ task GRIDSS { File? blacklistBed File? gridssProperties - Int jvmHeapSizeGb = 300 - Int nonJvmMemoryGb = 50 - Int threads = 16 + Int jvmHeapSizeGb = 64 + Int nonJvmMemoryGb = 10 + Int threads = 12 Int timeMinutes = ceil(7200 / threads) + 1800 String dockerImage = "quay.io/biowdl/gridss:2.12.2" } @@ -216,9 +216,9 @@ task Virusbreakend { String outputPath = "./virusbreakend.vcf" String memory = "75G" - Int threads = 8 + Int threads = 12 String dockerImage = "quay.io/biowdl/gridss:2.12.2" - Int timeMinutes = 180 + Int timeMinutes = 320 } command { diff --git a/hmftools.wdl b/hmftools.wdl index 5a480f93..ef6355c4 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -780,7 +780,7 @@ task Orange { String memory = "17G" String javaXmx = "16G" - Int timeMinutes = 1440 #FIXME + Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/orange:v1.6" } @@ -1225,9 +1225,9 @@ task Sage { String? mnvFilterEnabled File? coverageBed - Int threads = 4 - String javaXmx = "50G" - String memory = "51G" + Int threads = 32 + String javaXmx = "120G" + String memory = "121G" Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } diff --git a/sambamba.wdl b/sambamba.wdl index 4c2115e0..6696668a 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -28,7 +28,7 @@ task Flagstat { Int threads = 2 String memory = "8G" - Int timeMinutes = 120 + Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } From af5cf337f77dff48e4526e1da9ca6688a1fbe56c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 22 Jun 2022 12:48:03 +0200 Subject: [PATCH 0963/1208] adjust sage memory and time --- hmftools.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index ef6355c4..6c6ef045 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1226,9 +1226,9 @@ task Sage { File? coverageBed Int threads = 32 - String javaXmx = "120G" - String memory = "121G" - Int timeMinutes = 1 + ceil(size(select_all([tumorBam, referenceBam]), "G") * 9 / threads) + String javaXmx = "8G" + String memory = "9G" + Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } From 4608518f1afa3159658731aaac2dbfc32bedd8b8 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Jun 2022 11:09:25 +0200 Subject: [PATCH 0964/1208] increase sage memory --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 6c6ef045..32bc24fd 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1226,8 +1226,8 @@ task Sage { File? coverageBed Int threads = 32 - String javaXmx = "8G" - String memory = "9G" + String javaXmx = "16G" + String memory = "20G" Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } From 01aa41d21addca2002f1269ba41e165c33e9e03e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 14:09:40 +0200 Subject: [PATCH 0965/1208] fix heredoc --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 00705392..0e8fd434 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -94,7 +94,7 @@ task AnnotateSvTypes { command <<< set -e mkdir -p "$(dirname ~{outputPath})" - R --vanilla << EOF + R --vanilla << "EOF" library(VariantAnnotation) library(StructuralVariantAnnotation) @@ -115,7 +115,7 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - writeVcf(vcf, out_path) + writeVcf(vcf, out_path, index=T) EOF >>> From 39af0ad74c6296b2f9aa536ecb2ba123a156670e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 14:12:14 +0200 Subject: [PATCH 0966/1208] fix output name --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 0e8fd434..d0428e59 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -83,7 +83,7 @@ task AnnotateSvTypes { input { File gridssVcf File gridssVcfIndex - String outputPath = "./gridss.svtyped.vcf" + String outputPath = "./gridss.svtyped.vcf.bgz" String memory = "32G" String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" From 4e2a09e11c36a69b84451c44bf70c50825d67746 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 15:57:39 +0200 Subject: [PATCH 0967/1208] detect if compressed --- gridss.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index d0428e59..c12c24d6 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -90,6 +90,8 @@ task AnnotateSvTypes { Int timeMinutes = 240 } + String index = if sub(outputPath, "\\.bgz", "") != outputPath then "T" else "F" + # Based on https://github.com/PapenfussLab/gridss/issues/74 command <<< set -e @@ -115,7 +117,7 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - writeVcf(vcf, out_path, index=T) + writeVcf(vcf, out_path, index=~{index}) EOF >>> From 358c946dc86024324455193032d53873b8361d33 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 30 Jun 2022 16:36:09 +0200 Subject: [PATCH 0968/1208] fix duoble .bgz and and index to output --- gridss.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index c12c24d6..38daa029 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -90,7 +90,9 @@ task AnnotateSvTypes { Int timeMinutes = 240 } - String index = if sub(outputPath, "\\.bgz", "") != outputPath then "T" else "F" + String effectiveOutputPath = sub(outputPath, "\\.bgz", "") + String index = if effectiveOutputPath != outputPath then "T" else "F" + # Based on https://github.com/PapenfussLab/gridss/issues/74 command <<< @@ -101,7 +103,7 @@ task AnnotateSvTypes { library(StructuralVariantAnnotation) vcf_path <- "~{gridssVcf}" - out_path <- "~{outputPath}" + out_path <- "~{effectiveOutputPath}" # Simple SV type classifier simpleEventType <- function(gr) { @@ -123,6 +125,7 @@ task AnnotateSvTypes { output { File vcf = outputPath + File? vcfIndex = outputPath + ".tbi" } runtime { From 760f89e95596cb55ef2b78c27bb61c85cadedcc2 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 Jul 2022 10:13:48 +0200 Subject: [PATCH 0969/1208] give bcftools sort more time --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 88d97cd0..589cddea 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -187,7 +187,7 @@ task Sort { String tmpDir = "./sorting-tmp" String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 8e7ca0ce64ef97b3ba7859b245377294754edbd0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 1 Jul 2022 14:07:19 +0200 Subject: [PATCH 0970/1208] increase memory for bcftools sort --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 589cddea..2bf1c732 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -186,7 +186,7 @@ task Sort { String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" - String memory = "256M" + String memory = "5G" Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 3af704d65bf0ced2b0a76e049e1019031e2d1941 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 4 Jul 2022 13:04:22 +0200 Subject: [PATCH 0971/1208] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71309ae8..986582dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task to add SVTYPE annotations to GRIDSS results + (`AnnotateSvTypes`). + The GRIDSS task will now run tabix separately if GRIDSS doesn't produce a vcf index. + Add a script to subtract UMI's from the read name and add them as From 0f3cb30df3276150f6b168ebfc43ed596d9f140b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 4 Jul 2022 16:10:59 +0200 Subject: [PATCH 0972/1208] Add GT to gridss results in AnnotateSvTypes --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 38daa029..35e41d21 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -119,6 +119,8 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype + # GRIDSS doesn't supply a GT, so we estimate GT based on AF (assuming CN of 2, might be inaccurate) + geno(vcf)$GT <- ifelse(geno(vcf)$AF > 0.75, "1/1", ifelse(geno(vcf)$AF < 0.25, "0/0", "0/1")) writeVcf(vcf, out_path, index=~{index}) EOF >>> From cbd6de84edb3776aef10e774f2d15f8c29902490 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 13:20:33 +0200 Subject: [PATCH 0973/1208] fix typo in star GenomeGenerate parameter_meta --- star.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/star.wdl b/star.wdl index aa1fd608..6a123c86 100644 --- a/star.wdl +++ b/star.wdl @@ -78,7 +78,7 @@ task GenomeGenerate { parameter_meta { # inputs - genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"} + genomeDir: {description:"The directory the STAR index should be written to.", category: "common"} referenceFasta: {description: "The reference Fasta file.", category: "required"} referenceGtf: {description: "The reference GTF file.", category: "common"} sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"} From 9625c84b6749aa6b93f933d8a9bf307231dd73e7 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 15:12:44 +0200 Subject: [PATCH 0974/1208] update changelog --- CHANGELOG.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986582dd..afd115c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,19 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Added a task for CupGenerateReport. ++ Updated Cuppa to version 1.6. ++ Added a task for Gripss. ++ Fixed the HealthChecker task's determination of the `succeeded` output + value. ++ Updated Linx to version 1.18. ++ Added a task for LinxVisualization. ++ Added a task for HMFtools Orange. ++ Added a task for HMFtools Pave. ++ Updated Purple to version 3.2. ++ Added plot and table outputs of Sage to task outputs. ++ Updated virus-interpreter to version 1.2. ++ Updated Peach to version 1.5. + Added a task to add SVTYPE annotations to GRIDSS results (`AnnotateSvTypes`). + The GRIDSS task will now run tabix separately if GRIDSS doesn't From 743e4e0615aa3568f391e65b3fc064e188a6f12e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 18 Jul 2022 15:35:42 +0200 Subject: [PATCH 0975/1208] fix linting issue --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 5a480f93..628e2f9b 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -190,7 +190,7 @@ task CupGenerateReport { command { set -e mkdir -p ./workdir ~{outputDir} - ln -s -t workdir ~{sep=" " cupData} + ln -s -t workdir ~{cupData} CupGenerateReport \ ~{sampleName} \ workdir/ From e996b7930959027c31a1f7a2fd4683692a13a8a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 Aug 2022 10:00:49 +0200 Subject: [PATCH 0976/1208] increase time for cobalt --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 32bc24fd..a59b3897 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -114,7 +114,7 @@ task Cobalt { Int threads = 1 String memory = "5G" String javaXmx = "4G" - Int timeMinutes = 240 + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" } From e43bf3e4364a919cd3b380c58bb347d6be3a8069 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 2 Aug 2022 10:38:48 +0200 Subject: [PATCH 0977/1208] update changelog --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index afd115c8..f750b212 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,22 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Bedtools coverage's timeMinutes now defaults to `320`. ++ Gridss' runtime attribute defaults were changed to: + + jvmHeapSizeGb: `64` + + nonJvmMemoryGb: `10` + + threads: `12` ++ Virusbreakend's runtime attribute defaults were changed to: + + threads: `12` + + timeMinutes: `320` ++ Cobalt's timeMinutes now defaults to `480`. ++ Orange's timeMinutes now defaults to 10. ++ Sage's runtime attributes were changed to: + + threads: `32` + + javaXmx: `"16G"` + + memory: `"20G"` + + timeMinutes: `720` ++ Sambamba's runtimeMinutes nor defaults to `320`. + Added a task for CupGenerateReport. + Updated Cuppa to version 1.6. + Added a task for Gripss. From 24cc6213026dbe1de017ebeabc2de7fbfad912ae Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 8 Aug 2022 11:11:48 +0200 Subject: [PATCH 0978/1208] make purple's somaticRainfallPlot output optional --- CHANGELOG.md | 2 ++ hmftools.wdl | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f750b212..be0e5a7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Purple's `somaticRainfallPlot` output is now optional and included in + the `plots` output as well. + Bedtools coverage's timeMinutes now defaults to `320`. + Gridss' runtime attribute defaults were changed to: + jvmHeapSizeGb: `64` diff --git a/hmftools.wdl b/hmftools.wdl index 1542bdfc..f878181a 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1133,7 +1133,7 @@ task Purple { File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png" File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png" File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png" - File somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" + File? somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png" File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos" File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos" File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf" @@ -1150,8 +1150,8 @@ task Purple { purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv, purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex, purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv] - Array[File] plots = [circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, - segmentPlot, somaticClonalityPlot, somaticPlot] + Array[File] plots = select_all([circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot, + segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot]) Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink, circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap, circosSnp] From 8993b5c662428a0bcdc5d2fd4806812b061db529 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Aug 2022 14:25:59 +0200 Subject: [PATCH 0979/1208] Use gebibytes instead of gigabytes --- CPAT.wdl | 4 +-- bam2fastx.wdl | 4 +-- bcftools.wdl | 10 +++---- bedtools.wdl | 16 +++++------ biowdl.wdl | 2 +- bowtie.wdl | 2 +- bwa-mem2.wdl | 4 +-- bwa.wdl | 6 ++-- ccs.wdl | 2 +- centrifuge.wdl | 10 +++---- chunked-scatter.wdl | 4 +-- clever.wdl | 4 +-- collect-columns.wdl | 2 +- common.wdl | 20 +++++++------- cutadapt.wdl | 2 +- deconstructsigs.wdl | 2 +- deepvariant.wdl | 2 +- delly.wdl | 2 +- duphold.wdl | 2 +- extractSigPredictHRD.wdl | 2 +- fastqc.wdl | 2 +- fastqsplitter.wdl | 2 +- fgbio.wdl | 2 +- flash.wdl | 2 +- gatk.wdl | 56 ++++++++++++++++++------------------- gffcompare.wdl | 4 +-- gffread.wdl | 4 +-- gridss.wdl | 10 +++---- hisat2.wdl | 2 +- hmftools.wdl | 34 +++++++++++------------ htseq.wdl | 4 +-- isoseq3.wdl | 2 +- lima.wdl | 2 +- macs2.wdl | 2 +- manta.wdl | 4 +-- minimap2.wdl | 4 +-- multiqc.wdl | 6 ++-- nanopack.wdl | 4 +-- pacbio.wdl | 4 +-- pbbam.wdl | 2 +- pbmm2.wdl | 2 +- peach.wdl | 2 +- picard.wdl | 60 ++++++++++++++++++++-------------------- prepareShiny.wdl | 4 +-- rtg.wdl | 8 +++--- sambamba.wdl | 10 +++---- samtools.wdl | 34 +++++++++++------------ scripts | 2 +- smoove.wdl | 2 +- snpeff.wdl | 2 +- somaticseq.wdl | 10 +++---- spades.wdl | 2 +- star.wdl | 10 +++---- strelka.wdl | 4 +-- stringtie.wdl | 4 +-- survivor.wdl | 2 +- talon.wdl | 20 +++++++------- transcriptclean.wdl | 6 ++-- umi-tools.wdl | 6 ++-- umi.wdl | 4 +-- unicycler.wdl | 2 +- vardict.wdl | 2 +- vt.wdl | 2 +- whatshap.wdl | 6 ++-- wisestork.wdl | 8 +++--- 65 files changed, 234 insertions(+), 234 deletions(-) diff --git a/CPAT.wdl b/CPAT.wdl index e6cef3ea..b96ea0d7 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -34,8 +34,8 @@ task CPAT { Array[String]? startCodons Array[String]? stopCodons - String memory = "4G" - Int timeMinutes = 10 + ceil(size(gene, "G") * 30) + String memory = "4GiB" + Int timeMinutes = 10 + ceil(size(gene, "GiB") * 30) String dockerImage = "quay.io/biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } diff --git a/bam2fastx.wdl b/bam2fastx.wdl index 0bdccca8..62827fd9 100644 --- a/bam2fastx.wdl +++ b/bam2fastx.wdl @@ -30,7 +30,7 @@ task Bam2Fasta { String? seqIdPrefix - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } @@ -98,7 +98,7 @@ task Bam2Fastq { String? seqIdPrefix - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" } diff --git a/bcftools.wdl b/bcftools.wdl index 2bf1c732..726d2e37 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -47,7 +47,7 @@ task Annotate { File? samplesFile Int threads = 0 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -138,7 +138,7 @@ task Filter { String? softFilter String outputPath = "./filtered.vcf.gz" - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + ceil(size(vcf, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -186,7 +186,7 @@ task Sort { String outputPath = "output.vcf.gz" String tmpDir = "./sorting-tmp" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 5 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -261,7 +261,7 @@ task Stats { String? userTsTv Int threads = 0 - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + 2* ceil(size(select_all([inputVcf, compareVcf]), "G")) # TODO: Estimate, 2 minutes per GB, refine later. String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -350,7 +350,7 @@ task View { String? exclude String? include - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } diff --git a/bedtools.wdl b/bedtools.wdl index 80a281d6..fe18ede6 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -26,7 +26,7 @@ task Complement { File inputBed String outputBed = basename(inputBed, "\.bed") + ".complement.bed" - String memory = "~{512 + ceil(size([inputBed, faidx], "M"))}M" + String memory = "~{512 + ceil(size([inputBed, faidx], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([inputBed, faidx], "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -75,7 +75,7 @@ task Coverage { File? bIndex String outputPath = "./coverage.tsv" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/bedtools:2.30.0--h7d7f7ad_2" } @@ -120,7 +120,7 @@ task Merge { File inputBed String outputBed = "merged.bed" - String memory = "~{512 + ceil(size(inputBed, "M"))}M" + String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(inputBed, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -159,7 +159,7 @@ task MergeBedFiles { Array[File]+ bedFiles String outputBed = "merged.bed" - String memory = "~{512 + ceil(size(bedFiles, "M"))}M" + String memory = "~{512 + ceil(size(bedFiles, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(bedFiles, "G")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -207,8 +207,8 @@ task Sort { File? genome File? faidx - String memory = "~{512 + ceil(size(inputBed, "M"))}M" - Int timeMinutes = 1 + ceil(size(inputBed, "G")) + String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" + Int timeMinutes = 1 + ceil(size(inputBed, "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } @@ -267,8 +267,8 @@ task Intersect { File? faidx # Giving a faidx file will set the sorted option. - String memory = "~{512 + ceil(size([regionsA, regionsB], "M"))}M" - Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "G")) + String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" + Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" } diff --git a/biowdl.wdl b/biowdl.wdl index dead8303..f891618e 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -32,7 +32,7 @@ task InputConverter { Boolean checkFileMd5sums=false Boolean old=false - String memory = "128M" + String memory = "128MiB" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" } diff --git a/bowtie.wdl b/bowtie.wdl index 87210dcd..7e817594 100644 --- a/bowtie.wdl +++ b/bowtie.wdl @@ -37,7 +37,7 @@ task Bowtie { String picardXmx = "4G" Int threads = 1 - String memory = "~{5 + ceil(size(indexFiles, "G"))}G" + String memory = "~{5 + ceil(size(indexFiles, "GiB"))}GiB" Int timeMinutes = 1 + ceil(size(flatten([readsUpstream, readsDownstream]), "G") * 300 / threads) # Image contains bowtie=1.2.2 and picard=2.9.2 String dockerImage = "quay.io/biocontainers/mulled-v2-bfe71839265127576d3cd749c056e7b168308d56:1d8bec77b352cdcf3e9ff3d20af238b33ed96eae-0" diff --git a/bwa-mem2.wdl b/bwa-mem2.wdl index 4566e68c..b3db0ad1 100644 --- a/bwa-mem2.wdl +++ b/bwa-mem2.wdl @@ -36,7 +36,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads) + Int timeMinutes = 1 + ceil(size([read1, read2], "GiB") * 220 / threads) # Contains bwa-mem2 2.0 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-6a15c99309c82b345497d24489bee67bbb76c2f6:1c9c3227b9bf825a8dc9726a25701aa23c0b1f12-0" } @@ -84,7 +84,7 @@ task Mem { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/bwa.wdl b/bwa.wdl index 373de628..d4f4495a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -37,7 +37,7 @@ task Mem { Int threads = 4 Int? memoryGb - Int timeMinutes = 10 + ceil(size([read1, read2], "G") * 300 / threads) + Int timeMinutes = 10 + ceil(size([read1, read2], "GiB") * 300 / threads) # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10. String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0" } @@ -48,7 +48,7 @@ task Mem { Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0) Int totalSortThreads = select_first([sortThreads, estimatedSortThreads]) # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here. - Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "G") * 2) + sortMemoryPerThreadGb * totalSortThreads + Int estimatedMemoryGb = 10 + ceil(size(bwaIndex.indexFiles, "GiB") * 2) + sortMemoryPerThreadGb * totalSortThreads # The bwa postalt script is out commented as soon as usePostalt = false. # This hack was tested with bash, dash and ash. It seems that comments in between pipes work for all of them. @@ -81,7 +81,7 @@ task Mem { # One extra thread for bwa-postalt + samtools is not needed. # These only use 5-10% of compute power and not always simultaneously. cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/ccs.wdl b/ccs.wdl index 29f1a7f9..27db15ab 100644 --- a/ccs.wdl +++ b/ccs.wdl @@ -42,7 +42,7 @@ task CCS { String? chunkString Int threads = 2 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/pbccs:6.0.0--h9ee0642_2" } diff --git a/centrifuge.wdl b/centrifuge.wdl index 07dc7f85..757af239 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -36,7 +36,7 @@ task Build { File? sizeTable Int threads = 5 - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -109,7 +109,7 @@ task Classify { String? excludeTaxIDs Int threads = 4 - String memory = "16G" + String memory = "16GiB" Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -186,7 +186,7 @@ task Inspect { Int? across - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -245,7 +245,7 @@ task KReport { Int? minimumScore Int? minimumLength - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/centrifuge:1.0.4_beta--he513fc3_5" } @@ -303,7 +303,7 @@ task KTimportTaxonomy { File inputFile String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 String dockerImage = "biocontainers/krona:v2.7.1_cv1" } diff --git a/chunked-scatter.wdl b/chunked-scatter.wdl index 66954c36..af24b139 100644 --- a/chunked-scatter.wdl +++ b/chunked-scatter.wdl @@ -30,7 +30,7 @@ task ChunkedScatter { Int? overlap Int? minimumBasesPerFile - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } @@ -84,7 +84,7 @@ task ScatterRegions { Int? scatterSize - String memory = "256M" + String memory = "256MiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/chunked-scatter:1.0.0--py_0" } diff --git a/clever.wdl b/clever.wdl index 186be514..791a0ba1 100644 --- a/clever.wdl +++ b/clever.wdl @@ -34,7 +34,7 @@ task Mateclever { Int maxOffset = 150 Int threads = 10 - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -94,7 +94,7 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "55G" + String memory = "55GiB" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } diff --git a/collect-columns.wdl b/collect-columns.wdl index 3d65c7e7..03ccb6f7 100644 --- a/collect-columns.wdl +++ b/collect-columns.wdl @@ -62,7 +62,7 @@ task CollectColumns { } runtime { - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/common.wdl b/common.wdl index 1e4fc8cb..1ce2895f 100644 --- a/common.wdl +++ b/common.wdl @@ -25,7 +25,7 @@ task AppendToStringArray { Array[String] array String string - String memory = "1G" + String memory = "1GiB" } command { @@ -51,7 +51,7 @@ task CheckFileMD5 { # By default cromwell expects /bin/bash to be present in the container. # The 'bash' container does not fill this requirement. (It is in /usr/local/bin/bash) # Use a stable version of debian:stretch-slim for this. (Smaller than ubuntu) - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -75,7 +75,7 @@ task ConcatenateTextFiles { Boolean unzip = false Boolean zip = false - String memory = "1G" + String memory = "1GiB" } # When input and output is both compressed decompression is not needed. @@ -104,7 +104,7 @@ task Copy { Boolean recursive = false # Version not that important as long as it is stable. - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -132,7 +132,7 @@ task CreateLink { String inputFile String outputPath - String memory = "1G" + String memory = "1GiB" } command { @@ -170,7 +170,7 @@ task GetSamplePositionInArray { runtime { # 4 gigs of memory to be able to build the docker image in singularity. - memory: "4G" + memory: "4GiB" docker: dockerImage timeMinutes: 5 } @@ -190,7 +190,7 @@ task MapMd5 { input { Map[String,String] map - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -214,7 +214,7 @@ task StringArrayMd5 { input { Array[String] stringArray - String memory = "1G" + String memory = "1GiB" String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -238,7 +238,7 @@ task TextToFile { String text String outputFile = "out.txt" - String memory = "1G" + String memory = "1GiB" Int timeMinutes = 1 String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa" } @@ -274,7 +274,7 @@ task YamlToJson { File yaml String outputJson = basename(yaml, "\.ya?ml$") + ".json" - String memory = "128M" + String memory = "128MiB" Int timeMinutes = 1 # biowdl-input-converter has python and pyyaml. String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0" diff --git a/cutadapt.wdl b/cutadapt.wdl index b49a95d4..9a67692c 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -83,7 +83,7 @@ task Cutadapt { Boolean? noZeroCap Int cores = 4 - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" } diff --git a/deconstructsigs.wdl b/deconstructsigs.wdl index ef47e3e3..c44bf9c0 100644 --- a/deconstructsigs.wdl +++ b/deconstructsigs.wdl @@ -27,7 +27,7 @@ task DeconstructSigs { String outputPath = "./signatures.rds" Int timeMinutes = 15 - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/r-deconstructsigs:1.9.0--r41hdfd78af_1" } diff --git a/deepvariant.wdl b/deepvariant.wdl index 28aee813..25d05bd9 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -37,7 +37,7 @@ task RunDeepVariant { String? sampleName Boolean? VCFStatsReport = true - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 5000 String dockerImage = "google/deepvariant:1.0.0" } diff --git a/delly.wdl b/delly.wdl index bf00ed36..7333c5ff 100644 --- a/delly.wdl +++ b/delly.wdl @@ -28,7 +28,7 @@ task CallSV { File referenceFastaFai String outputPath = "./delly/delly.bcf" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" } diff --git a/duphold.wdl b/duphold.wdl index 80fe31d2..0426da56 100644 --- a/duphold.wdl +++ b/duphold.wdl @@ -30,7 +30,7 @@ task Duphold { String sample String outputPath = "./duphold.vcf" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1" } diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl index 2b5d9781..1520b608 100644 --- a/extractSigPredictHRD.wdl +++ b/extractSigPredictHRD.wdl @@ -30,7 +30,7 @@ task ExtractSigPredictHRD { File svVcfIndex Boolean hg38 = false - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14" } diff --git a/fastqc.wdl b/fastqc.wdl index 3a07db4e..d821e531 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -45,7 +45,7 @@ task Fastqc { # weird edge case fastq's. String javaXmx="1750M" Int threads = 1 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" diff --git a/fastqsplitter.wdl b/fastqsplitter.wdl index 25a50954..4a02697c 100644 --- a/fastqsplitter.wdl +++ b/fastqsplitter.wdl @@ -63,7 +63,7 @@ task Fastqsplitter { runtime { cpu: cores - memory: "~{memory}G" + memory: "~{memory}GiB" docker: dockerImage } } diff --git a/fgbio.wdl b/fgbio.wdl index d50906d3..15fb0ea4 100644 --- a/fgbio.wdl +++ b/fgbio.wdl @@ -26,7 +26,7 @@ task AnnotateBamWithUmis { File inputUmi String outputPath - String memory = "120G" + String memory = "120GiB" Int timeMinutes = 360 String javaXmx="100G" String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0" diff --git a/flash.wdl b/flash.wdl index c4554c50..7b50e0d7 100644 --- a/flash.wdl +++ b/flash.wdl @@ -34,7 +34,7 @@ task Flash { Int? maxOverlap Int threads = 2 - String memory = "2G" + String memory = "2GiB" } command { diff --git a/gatk.wdl b/gatk.wdl index 5cf7c673..0b93efe6 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -34,7 +34,7 @@ task AnnotateIntervals { File? segmentalDuplicationTrack String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -129,7 +129,7 @@ task ApplyBQSR { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -197,7 +197,7 @@ task BaseRecalibrator { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -232,7 +232,7 @@ task CalculateContamination { File? normalPileups String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -279,7 +279,7 @@ task CallCopyRatioSegments { File copyRatioSegments String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" Int timeMinutes = 2 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -332,7 +332,7 @@ task CollectAllelicCounts { File? commonVariantSitesIndex String javaXmx = "10G" - String memory = "11G" + String memory = "11GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -390,7 +390,7 @@ task CollectReadCounts { String intervalMergingRule = "OVERLAPPING_ONLY" String javaXmx = "7G" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -449,7 +449,7 @@ task CombineGVCFs { File referenceFastaFai String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -509,7 +509,7 @@ task CombineVariants { String outputPath String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 180 String dockerImage = "broadinstitute/gatk3:3.8-1" } @@ -579,7 +579,7 @@ task CreateReadCountPanelOfNormals { File? annotatedIntervals String javaXmx = "7G" - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 5 # The biocontainer causes a spark related error for some reason. String dockerImage = "broadinstitute/gatk:4.1.8.0" @@ -629,7 +629,7 @@ task DenoiseReadCounts { File? annotatedIntervals String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -690,7 +690,7 @@ task FilterMutectCalls { File? artifactPriors String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -775,7 +775,7 @@ task GatherBqsrReports { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -805,7 +805,7 @@ task GenomicsDBImport { String? tmpDir String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 180 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -866,7 +866,7 @@ task GenotypeGVCFs { File? pedigree String javaXmx = "6G" - String memory = "7G" + String memory = "7GiB" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -932,7 +932,7 @@ task GetPileupSummaries { String outputPrefix String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1032,7 +1032,7 @@ task HaplotypeCaller { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -1073,7 +1073,7 @@ task LearnReadOrientationModel { Array[File]+ f1r2TarGz String javaXmx = "12G" - String memory = "13G" + String memory = "13GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1114,7 +1114,7 @@ task MergeStats { Array[File]+ stats String javaXmx = "14G" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1162,7 +1162,7 @@ task ModelSegments { File? normalAllelicCounts String javaXmx = "10G" - String memory = "11G" + String memory = "11GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1250,7 +1250,7 @@ task MuTect2 { File? panelOfNormalsIndex String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1325,7 +1325,7 @@ task PlotDenoisedCopyRatios { Int? minimumContigLength String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1393,7 +1393,7 @@ task PlotModeledSegments { Int? minimumContigLength String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 2 String dockerImage = "broadinstitute/gatk:4.1.8.0" } @@ -1454,7 +1454,7 @@ task PreprocessIntervals { File? intervals String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1516,7 +1516,7 @@ task SelectVariants { String? selectTypeToInclude String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1576,7 +1576,7 @@ task SplitNCigarReads { Array[File] intervals = [] String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used. String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } @@ -1645,7 +1645,7 @@ task VariantEval { File? dbsnpVCFIndex String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" # TODO: Refine estimate. For now 4 minutes per GB of input. Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20) String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" @@ -1722,7 +1722,7 @@ task VariantFiltration { Array[File] intervals = [] String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 120 String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0" } diff --git a/gffcompare.wdl b/gffcompare.wdl index d06602bc..fe1db0a8 100644 --- a/gffcompare.wdl +++ b/gffcompare.wdl @@ -46,8 +46,8 @@ task GffCompare { Int? maxDistanceGroupingTranscriptStartSites String? namePrefix - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputGtfFiles, "GiB") * 30) String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0" # This workaround only works in the input section. diff --git a/gffread.wdl b/gffread.wdl index a04540f5..26a2773c 100644 --- a/gffread.wdl +++ b/gffread.wdl @@ -32,8 +32,8 @@ task GffRead { String? proteinFastaPath String? filteredGffPath - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputGff, "G") * 10) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputGff, "GiB") * 10) String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0" } diff --git a/gridss.wdl b/gridss.wdl index add3c08f..cfe53751 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -33,7 +33,7 @@ task AnnotateInsertedSequence { Int threads = 8 String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 120 } @@ -85,7 +85,7 @@ task AnnotateSvTypes { File gridssVcfIndex String outputPath = "./gridss.svtyped.vcf.bgz" - String memory = "32G" + String memory = "32GiB" String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0" Int timeMinutes = 240 } @@ -201,7 +201,7 @@ task GRIDSS { runtime { cpu: threads - memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}G" + memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}GiB" time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage } @@ -239,7 +239,7 @@ task GridssAnnotateVcfRepeatmasker { File gridssVcfIndex String outputPath = "./gridss.repeatmasker_annotated.vcf.gz" - String memory = "25G" + String memory = "25GiB" Int threads = 8 String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 1440 @@ -289,7 +289,7 @@ task Virusbreakend { File virusbreakendDB String outputPath = "./virusbreakend.vcf" - String memory = "75G" + String memory = "75GiB" Int threads = 12 String dockerImage = "quay.io/biowdl/gridss:2.12.2" Int timeMinutes = 320 diff --git a/hisat2.wdl b/hisat2.wdl index a2c0777c..50fabc9d 100644 --- a/hisat2.wdl +++ b/hisat2.wdl @@ -82,7 +82,7 @@ task Hisat2 { runtime { cpu: threads - memory: "~{select_first([memoryGb, estimatedMemoryGb])}G" + memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/hmftools.wdl b/hmftools.wdl index f878181a..26ab4e4a 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,7 +35,7 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "70G" + String memory = "70GiB" String javaXmx = "64G" Int timeMinutes = 240 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" @@ -112,7 +112,7 @@ task Cobalt { File gcProfile Int threads = 1 - String memory = "5G" + String memory = "5GiB" String javaXmx = "4G" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0" @@ -174,7 +174,7 @@ task CupGenerateReport { File cupData String outputDir = "./cuppa" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -242,7 +242,7 @@ task Cuppa { String outputDir = "./cuppa" String javaXmx = "4G" - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -298,7 +298,7 @@ task CuppaChart { File cupData String outputDir = "./cuppa" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biowdl/cuppa:1.6" } @@ -348,7 +348,7 @@ task Gripss { File vcfIndex String outputDir = "./" - String memory = "17G" + String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 50 String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0" @@ -419,7 +419,7 @@ task GripssApplicationKt { File breakendPon File breakpointPon - String memory = "32G" + String memory = "32GiB" String javaXmx = "31G" Int timeMinutes = 45 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" @@ -478,7 +478,7 @@ task GripssHardFilterApplicationKt { File inputVcf String outputPath = "gripss_hard_filter.vcf.gz" - String memory = "3G" + String memory = "3GiB" String javaXmx = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0" @@ -527,7 +527,7 @@ task HealthChecker { Array[File]+ purpleOutput String javaXmx = "2G" - String memory = "1G" + String memory = "3GiB" Int timeMinutes = 1 String dockerImage = "quay.io/biowdl/health-checker:3.2" } @@ -604,7 +604,7 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9G" + String memory = "9iB" String javaXmx = "8G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" @@ -690,7 +690,7 @@ task LinxVisualisations { Array[File]+ linxOutput Boolean plotReportable = true - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" @@ -778,7 +778,7 @@ task Orange { File cohortMappingTsv File cohortPercentilesTsv - String memory = "17G" + String memory = "17GiB" String javaXmx = "16G" Int timeMinutes = 10 String dockerImage = "quay.io/biowdl/orange:v1.6" @@ -902,7 +902,7 @@ task Pave { Int timeMinutes = 50 String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" String dockerImage = "quay.io/biowdl/pave:v1.0" } @@ -979,7 +979,7 @@ task Protect { File chordPrediction File annotatedVirus - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biowdl/protect:v2.0" @@ -1078,7 +1078,7 @@ task Purple { Int threads = 1 Int timeMinutes = 30 - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6' String dockerImage = "quay.io/biowdl/hmftools-purple:3.2" @@ -1227,7 +1227,7 @@ task Sage { Int threads = 32 String javaXmx = "16G" - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 720 String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1" } @@ -1315,7 +1315,7 @@ task VirusInterpreter { File virusReportingDbTsv String outputDir = "." - String memory = "3G" + String memory = "3GiB" String javaXmx = "2G" Int timeMinutes = 15 String dockerImage = "quay.io/biowdl/virus-interpreter:1.2" diff --git a/htseq.wdl b/htseq.wdl index 76d3bb83..92bc4423 100644 --- a/htseq.wdl +++ b/htseq.wdl @@ -33,8 +33,8 @@ task HTSeqCount { String? idattr Int nprocesses = 1 - String memory = "8G" - Int timeMinutes = 1440 #10 + ceil(size(inputBams, "G") * 60) FIXME + String memory = "8GiB" + Int timeMinutes = 1440 #10 + ceil(size(inputBams, "GiB") * 60) FIXME String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0" } diff --git a/isoseq3.wdl b/isoseq3.wdl index aacbfc60..77f19f80 100644 --- a/isoseq3.wdl +++ b/isoseq3.wdl @@ -32,7 +32,7 @@ task Refine { String outputNamePrefix Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/isoseq3:3.4.0--0" } diff --git a/lima.wdl b/lima.wdl index 6b87ad4f..eece2b3f 100644 --- a/lima.wdl +++ b/lima.wdl @@ -49,7 +49,7 @@ task Lima { String outputPrefix Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/lima:2.2.0--h9ee0642_0" } diff --git a/macs2.wdl b/macs2.wdl index 2afe3bbe..e6a011ad 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -31,7 +31,7 @@ task PeakCalling { String format = "AUTO" Boolean nomodel = false Int timeMinutes = 600 # Default to 10 hours - String memory = "8G" + String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } diff --git a/manta.wdl b/manta.wdl index 1c949af2..6804f304 100644 --- a/manta.wdl +++ b/manta.wdl @@ -60,7 +60,7 @@ task Germline { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } @@ -138,7 +138,7 @@ task Somatic { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } diff --git a/minimap2.wdl b/minimap2.wdl index 50ff4db3..96cc7734 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -31,7 +31,7 @@ task Indexing { Int? splitIndex Int cores = 1 - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } @@ -98,7 +98,7 @@ task Mapping { String? howToFindGTAG Int cores = 4 - String memory = "30G" + String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } diff --git a/multiqc.wdl b/multiqc.wdl index a1662937..21fc8a7d 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -57,11 +57,11 @@ task MultiQC { String? clConfig String? memory - Int timeMinutes = 10 + ceil(size(reports, "G") * 8) + Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" } - Int memoryGb = 2 + ceil(size(reports, "G")) + Int memoryGb = 2 + ceil(size(reports, "GiB")) # This is where the reports end up. It does not need to be changed by the # user. It is full of symbolic links, so it is not of any use to the user @@ -139,7 +139,7 @@ task MultiQC { } runtime { - memory: select_first([memory, "~{memoryGb}G"]) + memory: select_first([memory, "~{memoryGb}GiB"]) time_minutes: timeMinutes docker: dockerImage } diff --git a/nanopack.wdl b/nanopack.wdl index e4c94a43..bd3f433e 100644 --- a/nanopack.wdl +++ b/nanopack.wdl @@ -40,7 +40,7 @@ task NanoPlot { String? readType Int threads = 2 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoplot:1.38.0--pyhdfd78af_0" } @@ -130,7 +130,7 @@ task NanoQc { Int? minLength - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 15 String dockerImage = "quay.io/biocontainers/nanoqc:0.9.4--py_0" } diff --git a/pacbio.wdl b/pacbio.wdl index b21c69bc..dcf0f69e 100644 --- a/pacbio.wdl +++ b/pacbio.wdl @@ -25,7 +25,7 @@ task mergePacBio { Array[File]+ reports String outputPathMergedReport - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/redmar_van_den_berg/pacbio-merge:0.2" } @@ -62,7 +62,7 @@ task ccsChunks { input { Int chunkCount - String memory = "4G" + String memory = "4GiB" String dockerImage = "python:3.7-slim" } diff --git a/pbbam.wdl b/pbbam.wdl index ae64b87c..d5cafed6 100644 --- a/pbbam.wdl +++ b/pbbam.wdl @@ -26,7 +26,7 @@ task Index { String? outputBamPath - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) String dockerImage = "quay.io/biocontainers/pbbam:1.6.0--h5b7e6e0_0" } diff --git a/pbmm2.wdl b/pbmm2.wdl index 5fda1c87..ea7c05df 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -29,7 +29,7 @@ task Mapping { File queryFile Int cores = 4 - String memory = "30G" + String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" } diff --git a/peach.wdl b/peach.wdl index d1bc17f8..7da029d0 100644 --- a/peach.wdl +++ b/peach.wdl @@ -29,7 +29,7 @@ task Peach { String outputDir = "./peach" File panelJson - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biowdl/peach:v1.5" Int timeMinutes = 5 } diff --git a/picard.wdl b/picard.wdl index 3d835829..f762ecdd 100644 --- a/picard.wdl +++ b/picard.wdl @@ -27,7 +27,7 @@ task BedToIntervalList { String outputPath = "regions.interval_list" String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 5 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -88,7 +88,7 @@ task CollectHsMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the # reference fasta twice. - Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -109,7 +109,7 @@ task CollectHsMetrics { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -157,7 +157,7 @@ task CollectMultipleMetrics { Int javaXmxMb = 3072 Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. - Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 3 * 2) + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -223,7 +223,7 @@ task CollectMultipleMetrics { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -281,9 +281,9 @@ task CollectRnaSeqMetrics { String strandSpecificity = "NONE" String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" # With 6 minutes per G there were several timeouts. - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 12) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 12) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -340,8 +340,8 @@ task CollectTargetedPcrMetrics { String basename String javaXmx = "3G" - String memory = "4G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -402,7 +402,7 @@ task CollectVariantCallingMetrics { String basename String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -459,9 +459,9 @@ task CollectWgsMetrics { Int? minimumBaseQuality Int? coverageCap - String memory = "5G" + String memory = "5GiB" String javaXmx = "4G" - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 6) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" } @@ -516,7 +516,7 @@ task CreateSequenceDictionary { String outputDir String javaXmx = "2G" - String memory = "3G" + String memory = "3GiB" String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -568,7 +568,7 @@ task GatherBamFiles { Int javaXmxMb = 1024 Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 1) + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 1) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -593,7 +593,7 @@ task GatherBamFiles { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -630,8 +630,8 @@ task GatherVcfs { Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputVcfs, "G") * 2) + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size(inputVcfs, "GiB") * 2) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -700,7 +700,7 @@ task MarkDuplicates { Int javaXmxMb = 6656 # 6.5G String memoryMb = javaXmxMb + 512 - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 8) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -736,7 +736,7 @@ task MarkDuplicates { } runtime { - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -782,8 +782,8 @@ task MergeVCFs { Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "4G" - String memory = "5G" - Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2 + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size(inputVCFs, "GiB")) * 2 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -838,7 +838,7 @@ task SamToFastq { Boolean paired = true String javaXmx = "16G" # High memory default to avoid crashes. - String memory = "17G" + String memory = "17GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" @@ -900,7 +900,7 @@ task ScatterIntervalList { Int scatter_count String javaXmx = "3G" - String memory = "4G" + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -943,7 +943,7 @@ task SortSam { # 4.000000001 which gets rounded to 5. # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -971,7 +971,7 @@ task SortSam { runtime { cpu: 1 - memory: "~{1 + XmxGb}G" + memory: "~{1 + XmxGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -1004,8 +1004,8 @@ task SortVcf { File? dict String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(vcfFiles, "GiB") * 5) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -1054,8 +1054,8 @@ task RenameSample { String newSampleName String javaXmx = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(inputVcf, "GiB") * 2) String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } @@ -1109,7 +1109,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { Boolean useJdkInflater = false Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater String javaXmx = "8G" - String memory = "9G" + String memory = "9GiB" Int timeMinutes = 360 String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" } diff --git a/prepareShiny.wdl b/prepareShiny.wdl index d669e2d1..28910743 100644 --- a/prepareShiny.wdl +++ b/prepareShiny.wdl @@ -25,7 +25,7 @@ task CreateDesignMatrix { File countTable String shinyDir = "." - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } @@ -67,7 +67,7 @@ task CreateAnnotation { File referenceGtfFile String shinyDir = "." - String memory = "5G" + String memory = "5GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0" } diff --git a/rtg.wdl b/rtg.wdl index 0e86ce3f..3e9dab9b 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -27,8 +27,8 @@ task Format { String outputPath = "seq_data.sdf" String rtgMem = "8G" - String memory = "9G" - Int timeMinutes = 1 + ceil(size(inputFiles) * 2) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size(inputFiles, "GiB") * 2) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } @@ -85,8 +85,8 @@ task VcfEval { String rtgMem = "8G" Int threads = 1 # Tool default is number of cores in the system 😱. - String memory = "9G" - Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5) + String memory = "9GiB" + Int timeMinutes = 1 + ceil(size([baseline, calls], "GiB") * 5) String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0" } diff --git a/sambamba.wdl b/sambamba.wdl index 6696668a..be347f94 100644 --- a/sambamba.wdl +++ b/sambamba.wdl @@ -27,7 +27,7 @@ task Flagstat { String outputPath = "./flagstat.txt" Int threads = 2 - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 320 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -84,7 +84,7 @@ task Markdup { # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB. Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize # Time minute calculation does not work well for higher number of threads. - Int timeMinutes = 1 + ceil(size(inputBams, "G") * 25) / threads + Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 25) / threads String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -113,7 +113,7 @@ task Markdup { runtime { cpu: threads - memory: "~{memoryMb}M" + memory: "~{memoryMb}MiB" time_minutes: timeMinutes docker: dockerImage } @@ -149,7 +149,7 @@ task Sort { Int memoryPerThreadGb = 4 Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2" } @@ -177,7 +177,7 @@ task Sort { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" docker: dockerImage time_minutes: timeMinutes } diff --git a/samtools.wdl b/samtools.wdl index 81b6c17d..e1b08173 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -26,8 +26,8 @@ task BgzipAndIndex { String outputDir String type = "vcf" - String memory = "2G" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(inputFile, "GiB")) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -71,7 +71,7 @@ task Faidx { File inputFile String outputDir - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -119,7 +119,7 @@ task Fastq { Int? compressionLevel Int threads = 1 - String memory = "1G" + String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam) * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -183,8 +183,8 @@ task FilterShortReadsBam { File bamFile String outputPathBam - String memory = "1G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8) + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -229,7 +229,7 @@ task Flagstat { File inputBam String outputPath - String memory = "256M" # Only 40.5 MiB used for 150G bam file. + String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -269,8 +269,8 @@ task Index { String? outputBamPath - String memory = "2G" - Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -321,7 +321,7 @@ task Markdup { File inputBam String outputBamPath - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -359,8 +359,8 @@ task Merge { Boolean force = true Int threads = 1 - String memory = "4G" - Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2) + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -415,7 +415,7 @@ task Sort { Int memoryPerThreadGb = 4 Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb - Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -444,7 +444,7 @@ task Sort { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -473,7 +473,7 @@ task Tabix { String outputFilePath = "indexed.vcf.gz" String type = "vcf" - Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2) + Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" } @@ -526,8 +526,8 @@ task View { Int? MAPQthreshold Int threads = 1 - String memory = "1G" - Int timeMinutes = 1 + ceil(size(inFile, "G") * 5) + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } diff --git a/scripts b/scripts index 84690a30..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 diff --git a/smoove.wdl b/smoove.wdl index d1011f6c..7a1ac38b 100644 --- a/smoove.wdl +++ b/smoove.wdl @@ -29,7 +29,7 @@ task Call { String sample String outputDir = "./smoove" - String memory = "15G" + String memory = "15GiB" Int timeMinutes = 1440 String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0" } diff --git a/snpeff.wdl b/snpeff.wdl index 4a3640c7..0f14e5b5 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -36,7 +36,7 @@ task SnpEff { Boolean noShiftHgvs = false Int? upDownStreamLen - String memory = "9G" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" diff --git a/somaticseq.wdl b/somaticseq.wdl index 63f8362e..7656d086 100644 --- a/somaticseq.wdl +++ b/somaticseq.wdl @@ -47,7 +47,7 @@ task ParallelPaired { File? strelkaSNV File? strelkaIndel - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -162,7 +162,7 @@ task ParallelPairedTrain { File? strelkaSNV File? strelkaIndel - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -270,7 +270,7 @@ task ParallelSingle { File? scalpelVCF File? strelkaVCF - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 60 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -362,7 +362,7 @@ task ParallelSingleTrain { File? scalpelVCF File? strelkaVCF - String memory = "2G" + String memory = "2GiB" Int threads = 1 Int timeMinutes = 240 String dockerImage = "lethalfang/somaticseq:3.1.0" @@ -441,7 +441,7 @@ task ModifyStrelka { File strelkaVCF String outputVCFName = basename(strelkaVCF, ".gz") - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 20 String dockerImage = "lethalfang/somaticseq:3.1.0" } diff --git a/spades.wdl b/spades.wdl index 3975dd32..d717ab28 100644 --- a/spades.wdl +++ b/spades.wdl @@ -100,6 +100,6 @@ task Spades { runtime { cpu: threads - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" } } diff --git a/star.wdl b/star.wdl index 6a123c86..88d3c838 100644 --- a/star.wdl +++ b/star.wdl @@ -29,8 +29,8 @@ task GenomeGenerate { Int? sjdbOverhang Int threads = 4 - String memory = "32G" - Int timeMinutes = ceil(size(referenceFasta, "G") * 240 / threads) + String memory = "32GiB" + Int timeMinutes = ceil(size(referenceFasta, "GiB") * 240 / threads) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } @@ -130,12 +130,12 @@ task Star { Int runThreadN = 4 String? memory # 1 minute initialization + time reading in index (1 minute per G) + time aligning data. - Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 300 / runThreadN) + Int timeMinutes = 1 + ceil(size(indexFiles, "GiB")) + ceil(size(flatten([inputR1, inputR2]), "GiB") * 300 / runThreadN) String dockerImage = "quay.io/biocontainers/star:2.7.3a--0" } # Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index. - Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3) + Int memoryGb = 1 + ceil(size(indexFiles, "GiB") * 1.3) # For some reason doing above calculation inside a string does not work. # So we solve it with an optional memory string and using select_first # in the runtime section. @@ -172,7 +172,7 @@ task Star { runtime { cpu: runThreadN - memory: select_first([memory, "~{memoryGb}G"]) + memory: select_first([memory, "~{memoryGb}GiB"]) time_minutes: timeMinutes docker: dockerImage } diff --git a/strelka.wdl b/strelka.wdl index be08e386..39afe172 100644 --- a/strelka.wdl +++ b/strelka.wdl @@ -63,7 +63,7 @@ task Germline { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } @@ -139,7 +139,7 @@ task Somatic { runtime { cpu: cores - memory: "~{memoryGb}G" + memory: "~{memoryGb}GiB" time_minutes: timeMinutes docker: dockerImage } diff --git a/stringtie.wdl b/stringtie.wdl index 9c2f3cfc..fbe7e442 100644 --- a/stringtie.wdl +++ b/stringtie.wdl @@ -34,7 +34,7 @@ task Stringtie { Float? minimumCoverage Int threads = 1 - String memory = "2G" + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads) String dockerImage = "quay.io/biocontainers/stringtie:1.3.6--h92e31bf_0" } @@ -102,7 +102,7 @@ task Merge { Float? minimumIsoformFraction String? label - String memory = "10G" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20) String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0" } diff --git a/survivor.wdl b/survivor.wdl index de232405..b233fb52 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -31,7 +31,7 @@ task Merge { Int minSize = 30 String outputPath = "./survivor/merged.vcf" - String memory = "24G" + String memory = "24GiB" Int timeMinutes = 60 String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" } diff --git a/talon.wdl b/talon.wdl index 61f5eb4a..2f93e36b 100644 --- a/talon.wdl +++ b/talon.wdl @@ -30,7 +30,7 @@ task CreateAbundanceFileFromDatabase { File? whitelistFile File? datasetsFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -86,7 +86,7 @@ task CreateGtfFromDatabase { File? whitelistFile File? datasetFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -144,7 +144,7 @@ task FilterTalonTranscripts { File? datasetsFile Int? minDatasets - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -200,7 +200,7 @@ task GetReadAnnotations { File? datasetFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -248,7 +248,7 @@ task GetSpliceJunctions { String runMode = "intron" String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -302,7 +302,7 @@ task InitializeTalonDatabase { Int cutOff3p = 300 String outputPrefix - String memory = "10G" + String memory = "10GiB" Int timeMinutes = 60 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -360,7 +360,7 @@ task LabelReads { String outputPrefix Int threads = 4 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -413,7 +413,7 @@ task ReformatGtf { input { File gtfFile - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -454,7 +454,7 @@ task SummarizeDatasets { File? datasetGroupsCsv - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 50 String dockerImage = "biocontainers/talon:v5.0_cv1" } @@ -506,7 +506,7 @@ task Talon { String outputPrefix Int threads = 4 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/talon:v5.0_cv1" } diff --git a/transcriptclean.wdl b/transcriptclean.wdl index efdd95f4..8607a7a3 100644 --- a/transcriptclean.wdl +++ b/transcriptclean.wdl @@ -27,7 +27,7 @@ task GetSJsFromGtf { String outputPrefix Int minIntronSize = 21 - String memory = "8G" + String memory = "8GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -72,7 +72,7 @@ task GetTranscriptCleanStats { File inputSam String outputPrefix - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } @@ -128,7 +128,7 @@ task TranscriptClean { File? variantFile Int cores = 1 - String memory = "25G" + String memory = "25GiB" Int timeMinutes = 2880 String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1" } diff --git a/umi-tools.wdl b/umi-tools.wdl index b79817c2..d8d17c48 100644 --- a/umi-tools.wdl +++ b/umi-tools.wdl @@ -30,7 +30,7 @@ task Extract { String? read2Output = "umi_extracted_R2.fastq.gz" Boolean threePrime = false - String memory = "20G" + String memory = "20GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } @@ -87,8 +87,8 @@ task Dedup { String? umiSeparator String? statsPrefix - String memory = "25G" - Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30) + String memory = "25GiB" + Int timeMinutes = 30 + ceil(size(inputBam, "GiB") * 30) String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0" } diff --git a/umi.wdl b/umi.wdl index 0dc5c55e..e7f01fc2 100644 --- a/umi.wdl +++ b/umi.wdl @@ -30,8 +30,8 @@ task BamReadNameToUmiTag { String outputPath = "output.bam" String umiTag = "RX" - String memory = "2G" - Int timeMinutes = 1 + ceil(size([inputBam], "G") * 10) + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10) String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0" } diff --git a/unicycler.wdl b/unicycler.wdl index 938d0c7e..d83db3ca 100644 --- a/unicycler.wdl +++ b/unicycler.wdl @@ -66,7 +66,7 @@ task Unicycler { String? lowScore Int threads = 1 - String memory = "4G" + String memory = "4GiB" } command { diff --git a/vardict.wdl b/vardict.wdl index 1c20e51c..187b4567 100644 --- a/vardict.wdl +++ b/vardict.wdl @@ -48,7 +48,7 @@ task VarDict { String javaXmx = "16G" Int threads = 1 - String memory = "18G" + String memory = "18GiB" Int timeMinutes = 300 String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1" } diff --git a/vt.wdl b/vt.wdl index 85077dae..4da2d8cd 100644 --- a/vt.wdl +++ b/vt.wdl @@ -29,7 +29,7 @@ task Normalize { Boolean ignoreMaskedRef = false String outputPath = "./vt/normalized_decomposed.vcf" - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 30 String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" } diff --git a/whatshap.wdl b/whatshap.wdl index 7307ce7c..da86ad82 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -38,7 +38,7 @@ task Phase { String? threshold String? ped - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -109,7 +109,7 @@ task Stats { String? blockList String? chromosome - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -169,7 +169,7 @@ task Haplotag { String? regions String? sample - String memory = "4G" + String memory = "4GiB" Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" diff --git a/wisestork.wdl b/wisestork.wdl index 8fb4b76b..bef54e27 100644 --- a/wisestork.wdl +++ b/wisestork.wdl @@ -31,7 +31,7 @@ task Count { Int? binSize File? binFile - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -69,7 +69,7 @@ task GcCorrect { Int? iter Float? fracLowess - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } @@ -129,7 +129,7 @@ task Newref { } runtime { - memory: "~{memory}G" + memory: "~{memory}GiB" docker: dockerImage } } @@ -147,7 +147,7 @@ task Zscore { Int? binSize File? binFile - String memory = "2G" + String memory = "2GiB" String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0" } From 5523913a18f121dcc524cac346dd82cf1162e804 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 22 Aug 2022 14:37:42 +0200 Subject: [PATCH 0980/1208] Update changelog with memory change --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index be0e5a7c..5f4fed5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` + previously. The WDL spec clearly distuingishes between SI and binary + notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and + `GiB` this means java tasks such as GATK, FastQC and Picard will always + receive enough memory now. + Purple's `somaticRainfallPlot` output is now optional and included in the `plots` output as well. + Bedtools coverage's timeMinutes now defaults to `320`. From 75bb0cbcf2d2ccc57e8c5857f140cffe2a310c67 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 25 Aug 2022 13:57:10 +0200 Subject: [PATCH 0981/1208] update survivor version --- CHANGELOG.md | 1 + survivor.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f4fed5b..b0b7c3e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Updated SURVIVOR version to 1.0.7 + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and diff --git a/survivor.wdl b/survivor.wdl index b233fb52..ae246f60 100644 --- a/survivor.wdl +++ b/survivor.wdl @@ -33,7 +33,7 @@ task Merge { String memory = "24GiB" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0" + String dockerImage = "quay.io/biocontainers/survivor:1.0.7--hd03093a_2" } command { From bf7aba3c332a8dcabc87d22e1740049ed4bf7db4 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 7 Oct 2022 17:59:35 +0200 Subject: [PATCH 0982/1208] add fastp --- fastp.wdl | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 fastp.wdl diff --git a/fastp.wdl b/fastp.wdl new file mode 100644 index 00000000..8cf99d99 --- /dev/null +++ b/fastp.wdl @@ -0,0 +1,101 @@ +verison 1.0 + +# MIT License +# +# Copyright (c) 2022 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Fastp { + input { + File r1 + File r2 + String outputPathR1 + String outputPathR2 + String htmlPath + String jsonPath + + Int compressionLevel = 1 + Boolean correction = false + Int lengthRequired = 15 + Int? split + + Int threads = 4 + String memory = "5GiB" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / cores) + String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" + } + + String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") + + command { + set -e + mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) + # predict output paths + seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths + seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths + fastp \ + -i ~{r1} \ + ~{"-I " + r2} \ + -o ~{outputPathR1} \ + ~{"-O " + outputPathR2} \ + -h ~{htmlPath} \ + -j ~{jsonPath} \ + -z ~{compressionLevel} \ + ~{if correction then "--correction" else ""} \ + --length_required ~{lengthRequired} \ + --threads ~{threads} \ + ~{"--split " + split} \ + ~{if defined(split) then "-d 0" else ""} + } + + Array[String] r1Paths = read_lines("r1_paths") + Array[String] r2Paths = read_lines("r2_paths") + + output { + File htmlReport = htmlPath + File jsonReport = jsonPath + Array[File] clippedR1 = if defined(split) then r1Paths else [outputPathR1] + Array[File] clippedR2 = if defined(split) then r2Paths else [outputPathR2] + } + + runtime { + cpu: cores + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + r1: {description: "The R1 fastq file.", category: "required"} + r2: {description: "The R2 fastq file.", category: "required"} + outputPathR1: {description: "The output path for the R1 file.", category: "required"} + outputPathR2: {description: "The output path for the R2 file.", category: "required"} + htmlPath: {description: "The path to write the html report to.", category: "required"} + jsonPath: {description: "The path to write the json report to.", category: "required"} + compressionLevel: {description: "The compression level to use for the output.", category: "advanced"} + correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} + lengthRequired: {description: "The minimum read length.", category: "advanced"} + split: {description: "The number of chunks to split the files into.", category: "common"} + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 64427306fbbf58eb3ca9b3850a223d06894c9391 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 11 Oct 2022 12:13:08 +0200 Subject: [PATCH 0983/1208] fix some issues in fastp, add picard CollectInzertSizeMetrics --- fastp.wdl | 28 ++++++++++++++++------------ picard.wdl | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 8cf99d99..3063d012 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -1,4 +1,4 @@ -verison 1.0 +version 1.0 # MIT License # @@ -24,8 +24,8 @@ verison 1.0 task Fastp { input { - File r1 - File r2 + File read1 + File read2 String outputPathR1 String outputPathR2 String htmlPath @@ -35,24 +35,26 @@ task Fastp { Boolean correction = false Int lengthRequired = 15 Int? split + Boolean performAdapterTrimming = true Int threads = 4 String memory = "5GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / cores) + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") + String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") - command { + command <<< set -e mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) # predict output paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths fastp \ - -i ~{r1} \ - ~{"-I " + r2} \ + -i ~{read1} \ + ~{"-I " + read2} \ -o ~{outputPathR1} \ ~{"-O " + outputPathR2} \ -h ~{htmlPath} \ @@ -62,8 +64,9 @@ task Fastp { --length_required ~{lengthRequired} \ --threads ~{threads} \ ~{"--split " + split} \ - ~{if defined(split) then "-d 0" else ""} - } + ~{if defined(split) then "-d 0" else ""} \ + ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} + >>> Array[String] r1Paths = read_lines("r1_paths") Array[String] r2Paths = read_lines("r2_paths") @@ -76,15 +79,15 @@ task Fastp { } runtime { - cpu: cores + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage } parameter_meta { - r1: {description: "The R1 fastq file.", category: "required"} - r2: {description: "The R2 fastq file.", category: "required"} + read1: {description: "The R1 fastq file.", category: "required"} + read2: {description: "The R2 fastq file.", category: "required"} outputPathR1: {description: "The output path for the R1 file.", category: "required"} outputPathR2: {description: "The output path for the R2 file.", category: "required"} htmlPath: {description: "The path to write the html report to.", category: "required"} @@ -93,6 +96,7 @@ task Fastp { correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} lengthRequired: {description: "The minimum read length.", category: "advanced"} split: {description: "The number of chunks to split the files into.", category: "common"} + performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} diff --git a/picard.wdl b/picard.wdl index f762ecdd..6628cf0e 100644 --- a/picard.wdl +++ b/picard.wdl @@ -136,6 +136,58 @@ task CollectHsMetrics { } } +task CollectInsertSizeMetrics { + input { + File inputBam + File inputBamIndex + + Float? minimumPercentage + String basename = "./insertSize_metrics" + + String memory = "5GiB" + String javaXmx = "4G" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) + String dockerImage = "quay.io/biocontainers/picard:2.23.2--0" + } + + command { + set -e + mkdir -p "$(dirname ~{basename})" + picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + CollectInsertSizeMetrics \ + I=~{inputBam} \ + O=~{basename}.txt \ + H=~{basename}.pdf \ + ~{"M=" + minimumPercentage} + } + + output { + File metricsTxt = "~{basename}.txt" + File metricsPdf = "~{basename}.pdf" + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes + memory: memory + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM file for which metrics will be collected.", category: "required"} + inputBamIndex: {description: "The index of the input BAM file.", category: "required"} + minimumPercentage: {description: "Equivalent to picard CollectInsertSizeMetrics' `M` option.", category: "advanced"} + basename: {description: "The basename for the output files.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task CollectMultipleMetrics { input { File inputBam From 346c0044a15279e1e3c5cd7140e24d9321255be8 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 15:21:07 +0200 Subject: [PATCH 0984/1208] fix fastp task --- fastp.wdl | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 3063d012..c7a4d19f 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -48,7 +48,11 @@ task Fastp { command <<< set -e - mkdir -p $(dirname ~{outputPathR1} ~{outputPathR2} ~{htmlPath} ~{jsonPath}) + mkdir -p $(dirname ~{outputPathR1}) + mkdir -p $(dirname ~{outputPathR2}) + mkdir -p $(dirname ~{htmlPath}) + mkdir -p $(dirname ~{jsonPath}) + # predict output paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths @@ -68,14 +72,11 @@ task Fastp { ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} >>> - Array[String] r1Paths = read_lines("r1_paths") - Array[String] r2Paths = read_lines("r2_paths") - output { File htmlReport = htmlPath File jsonReport = jsonPath - Array[File] clippedR1 = if defined(split) then r1Paths else [outputPathR1] - Array[File] clippedR2 = if defined(split) then r2Paths else [outputPathR2] + Array[File] clippedR1 = if defined(split) then read_lines("r1_paths") else [outputPathR1] + Array[File] clippedR2 = if defined(split) then read_lines("r2_paths") else [outputPathR2] } runtime { From 5b55e1b657b4d6d9ee189317d7cc5054493ef863 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 15:26:38 +0200 Subject: [PATCH 0985/1208] typo --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index c7a4d19f..572de7dc 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -66,7 +66,7 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --threads ~{threads} \ + --thread ~{threads} \ ~{"--split " + split} \ ~{if defined(split) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} From 9dda4c842ac98d083bd9c9fdeec1e97437040e65 Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 16:02:40 +0200 Subject: [PATCH 0986/1208] increase memory for fastp --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index 572de7dc..becbaf4b 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,7 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "5GiB" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } From f8aa7e37593df2282161bc37c49a1d0b5039185b Mon Sep 17 00:00:00 2001 From: davycats Date: Fri, 14 Oct 2022 18:06:18 +0200 Subject: [PATCH 0987/1208] increase memory for fastp --- fastp.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index becbaf4b..25f09e39 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,7 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "10GiB" + String memory = "20GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } From e9215442ac12ff2f9ea4833b69daf809d8957cc6 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Fri, 21 Oct 2022 15:14:04 +0200 Subject: [PATCH 0988/1208] fastp: use number of splits as number of threads if set --- fastp.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 25f09e39..7f269d81 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,8 +38,8 @@ task Fastp { Boolean performAdapterTrimming = true Int threads = 4 - String memory = "20GiB" - Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 7.0 / threads) + String memory = "50GiB" + Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" } @@ -66,7 +66,7 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --thread ~{threads} \ + --thread ~{select_first([split, threads])} \ ~{"--split " + split} \ ~{if defined(split) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} @@ -80,7 +80,7 @@ task Fastp { } runtime { - cpu: threads + cpu: select_first([split, threads]) memory: memory time_minutes: timeMinutes docker: dockerImage @@ -96,9 +96,9 @@ task Fastp { compressionLevel: {description: "The compression level to use for the output.", category: "advanced"} correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"} lengthRequired: {description: "The minimum read length.", category: "advanced"} - split: {description: "The number of chunks to split the files into.", category: "common"} + split: {description: "The number of chunks to split the files into. Number of threads will be set equal to the amount of splits.", category: "common"} performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} - threads: {description: "The number of threads to use.", category: "advanced"} + threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From c7754754273f3ae4ce4bb34a9211cafec7880306 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 21 Oct 2022 16:48:34 +0200 Subject: [PATCH 0989/1208] Add a task to produce fasta indices --- biowdl.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/biowdl.wdl b/biowdl.wdl index f891618e..7392983a 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -73,3 +73,49 @@ task InputConverter { json: {description: "JSON file version of the input sample sheet."} } } + +task IndexFastaFile { + input { + File inputFile + String outputDir = "." + String javaXmx = "2G" + String memory = "3GiB" + } + String outputFile = outputDir + "/" + basename(inputFile) + # This executes both picard and samtools, so indexes are co-located in the same folder. + command <<< + set -e + mkdir -p ~{outputDir} + ln -s ~{inputFile} ~{outputFile} + picard -Xmx~{javaXmx} \ + -XX:ParallelGCThreads=1 \ + CreateSequenceDictionary \ + REFERENCE=~{inputFile} \ + OUTPUT="~{outputFile}.dict" + samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai + >>> + + output { + File outputFasta = outputFile + File outputFastaDict = outputFile + ".dict" + File outputFastaFai = outputFile + ".fai" + } + + runtime { + memory: memory + # Contains picard 2.27.4, samtools 1.15.1 + docker: "quay.io/biocontainers/mulled-v2-b0664646864bfdb46c5343b1b2b93fc05adb4b77:39a005770a3e30fb6aa3bf424b57ddf52bae7ece-0" + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + outputDir: {description: "Output directory path.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + # outputs + outputFasta: {description: "Fasta file that is co-located with the indexes"} + outputFastaFai: {description: "Fasta index file for the outputFasta file."} + outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} + } +} \ No newline at end of file From 8d5a451e1d3938f62d14add4167fcf83dd9a0e70 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 24 Oct 2022 09:45:54 +0200 Subject: [PATCH 0990/1208] typo --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 26ab4e4a..5776dfed 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -604,7 +604,7 @@ task Linx { File transExonDataCsv File transSpliceDataCsv - String memory = "9iB" + String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 10 String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0" From f05d968d69d6c3a41b03a761a4a4838e5889df6c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 14:45:15 +0200 Subject: [PATCH 0991/1208] Add a Bwa index task --- bwa.wdl | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/bwa.wdl b/bwa.wdl index d4f4495a..f79a219a 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -114,3 +114,29 @@ struct BwaIndex { File fastaFile Array[File] indexFiles } + +task Index { + input { + File fasta + } + File indexedFile = "reference.fasta" + + command { + set -e + cp ~{fasta} ~{indexedFile} + bwa index ~{indexedFile} + } + + output { + BwaIndex index = { + "fastaFile": indexedFile, + "indexFiles": [ + indexedFile + ".amb", + indexedFile + ".ann", + indexedFile + ".bwt", + indexedFile + ".pac", + indexedFile + ".sa" + ] + } + } +} \ No newline at end of file From 23b324ea33f63cb4901fd66528f4ecead4cab0d5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 15:02:55 +0200 Subject: [PATCH 0992/1208] Copy reference to prevent problems --- biowdl.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biowdl.wdl b/biowdl.wdl index 7392983a..fe49a6cf 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -86,7 +86,7 @@ task IndexFastaFile { command <<< set -e mkdir -p ~{outputDir} - ln -s ~{inputFile} ~{outputFile} + cp ~{inputFile} ~{outputFile} picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ From 4431b259d68024b057fe5cfd5dc4de2424450d4b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 24 Oct 2022 15:46:09 +0200 Subject: [PATCH 0993/1208] Make sure index task works --- bwa.wdl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/bwa.wdl b/bwa.wdl index f79a219a..a129ebb4 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -119,7 +119,7 @@ task Index { input { File fasta } - File indexedFile = "reference.fasta" + String indexedFile = "reference.fasta" command { set -e @@ -128,9 +128,9 @@ task Index { } output { - BwaIndex index = { - "fastaFile": indexedFile, - "indexFiles": [ + BwaIndex index = object { + fastaFile: indexedFile, + indexFiles: [ indexedFile + ".amb", indexedFile + ".ann", indexedFile + ".bwt", @@ -139,4 +139,10 @@ task Index { ] } } + + runtime { + docker: "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + cpu: 1 + memory: "~{size(fasta, 'G') + 1}GiB" + } } \ No newline at end of file From af929db9c2392cdc24a3ef2e7c644ca4d055cc3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Oct 2022 14:17:40 +0200 Subject: [PATCH 0994/1208] Use the basename of the input file for index names --- biowdl.wdl | 11 +++++------ bwa.wdl | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index fe49a6cf..58e94df8 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -77,27 +77,27 @@ task InputConverter { task IndexFastaFile { input { File inputFile - String outputDir = "." String javaXmx = "2G" String memory = "3GiB" } - String outputFile = outputDir + "/" + basename(inputFile) + String outputFile = basename(inputFile) + # Capture .fa¸ .fna and .fasta + String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" # This executes both picard and samtools, so indexes are co-located in the same folder. command <<< set -e - mkdir -p ~{outputDir} cp ~{inputFile} ~{outputFile} picard -Xmx~{javaXmx} \ -XX:ParallelGCThreads=1 \ CreateSequenceDictionary \ REFERENCE=~{inputFile} \ - OUTPUT="~{outputFile}.dict" + OUTPUT="~{outputDict}" samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai >>> output { File outputFasta = outputFile - File outputFastaDict = outputFile + ".dict" + File outputFastaDict = outputDict File outputFastaFai = outputFile + ".fai" } @@ -110,7 +110,6 @@ task IndexFastaFile { parameter_meta { # inputs inputFile: {description: "The input fasta file.", category: "required"} - outputDir: {description: "Output directory path.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} # outputs diff --git a/bwa.wdl b/bwa.wdl index a129ebb4..8f694b45 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -119,7 +119,7 @@ task Index { input { File fasta } - String indexedFile = "reference.fasta" + String indexedFile = basename(fasta) command { set -e From 2dc14b39d06dcc1c8161a9bf5840ebe5d88ccb25 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 25 Oct 2022 14:33:20 +0200 Subject: [PATCH 0995/1208] Make index use the basename of the file --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index e1b08173..bee38d11 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -470,7 +470,7 @@ task Sort { task Tabix { input { File inputFile - String outputFilePath = "indexed.vcf.gz" + String outputFilePath = basename(inputFile) String type = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) From 46bf6537c1787f47b7758d350b6605dae6da00cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 26 Oct 2022 14:38:17 +0200 Subject: [PATCH 0996/1208] Add indexing tasks to the changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f4fed5b..d94c2b56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a combined Picard CreateSequenceDictionary and samtools faidx task. ++ Add a BWA index task. + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and From c6fe0300c5d2e5275739148c051f931e717cd6f1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 11:38:30 +0200 Subject: [PATCH 0997/1208] Use samtools dict instead of Picard CreateSequenceDictionary --- CHANGELOG.md | 2 +- biowdl.wdl | 45 --------------------------------------------- samtools.wdl | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d94c2b56..b9df32a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- -+ Add a combined Picard CreateSequenceDictionary and samtools faidx task. ++ Add a combined samtools dict and samtools faidx task. + Add a BWA index task. + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary diff --git a/biowdl.wdl b/biowdl.wdl index 58e94df8..463dab75 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -72,49 +72,4 @@ task InputConverter { # outputs json: {description: "JSON file version of the input sample sheet."} } -} - -task IndexFastaFile { - input { - File inputFile - String javaXmx = "2G" - String memory = "3GiB" - } - String outputFile = basename(inputFile) - # Capture .fa¸ .fna and .fasta - String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" - # This executes both picard and samtools, so indexes are co-located in the same folder. - command <<< - set -e - cp ~{inputFile} ~{outputFile} - picard -Xmx~{javaXmx} \ - -XX:ParallelGCThreads=1 \ - CreateSequenceDictionary \ - REFERENCE=~{inputFile} \ - OUTPUT="~{outputDict}" - samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai - >>> - - output { - File outputFasta = outputFile - File outputFastaDict = outputDict - File outputFastaFai = outputFile + ".fai" - } - - runtime { - memory: memory - # Contains picard 2.27.4, samtools 1.15.1 - docker: "quay.io/biocontainers/mulled-v2-b0664646864bfdb46c5343b1b2b93fc05adb4b77:39a005770a3e30fb6aa3bf424b57ddf52bae7ece-0" - } - - parameter_meta { - # inputs - inputFile: {description: "The input fasta file.", category: "required"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} - # outputs - outputFasta: {description: "Fasta file that is co-located with the indexes"} - outputFastaFai: {description: "Fasta index file for the outputFasta file."} - outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} - } } \ No newline at end of file diff --git a/samtools.wdl b/samtools.wdl index bee38d11..d5e3ce0e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -66,6 +66,49 @@ task BgzipAndIndex { } } +task DictAndFaidx { + input { + File inputFile + String javaXmx = "2G" + String memory = "3GiB" + String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + } + + String outputFile = basename(inputFile) + # Capture .fa¸ .fna and .fasta + String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict" + # This executes both dict and faidx, so indexes are co-located in the same folder. + command <<< + set -e + cp ~{inputFile} ~{outputFile} + samtools dict -o ~{outputDict} ~{outputFile} + samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai + >>> + + output { + File outputFasta = outputFile + File outputFastaDict = outputDict + File outputFastaFai = outputFile + ".fai" + } + + runtime { + memory: memory + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "The input fasta file.", category: "required"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + # outputs + outputFasta: {description: "Fasta file that is co-located with the indexes"} + outputFastaFai: {description: "Fasta index file for the outputFasta file."} + outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} + task Faidx { input { File inputFile From 61161df05a65d5a3f3427d381254988208266c98 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 11:45:26 +0200 Subject: [PATCH 0998/1208] Add time_minutes dockerimage and update parameter_meta --- biowdl.wdl | 2 +- bwa.wdl | 16 ++++++++++++++-- samtools.wdl | 4 +++- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/biowdl.wdl b/biowdl.wdl index 463dab75..f891618e 100644 --- a/biowdl.wdl +++ b/biowdl.wdl @@ -72,4 +72,4 @@ task InputConverter { # outputs json: {description: "JSON file version of the input sample sheet."} } -} \ No newline at end of file +} diff --git a/bwa.wdl b/bwa.wdl index 8f694b45..e1e61bbe 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -118,6 +118,8 @@ struct BwaIndex { task Index { input { File fasta + String dockerImage = "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + Int? timeMinutes = 5 + ceil(size(fasta, "G") * 5) } String indexedFile = basename(fasta) @@ -141,8 +143,18 @@ task Index { } runtime { - docker: "quay.io/biocontainers/bwa:0.7.17--hed695b0_7" + docker: dockerImage cpu: 1 memory: "~{size(fasta, 'G') + 1}GiB" + time_minutes: timeMinutes + } + parameter_meta { + # inputs + fasta: {description: "Reference fasta file.", category: "required"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + index: {description: "The produced BWA index."} } -} \ No newline at end of file +} diff --git a/samtools.wdl b/samtools.wdl index d5e3ce0e..76a07ef5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -71,6 +71,7 @@ task DictAndFaidx { File inputFile String javaXmx = "2G" String memory = "3GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" } @@ -101,11 +102,12 @@ task DictAndFaidx { inputFile: {description: "The input fasta file.", category: "required"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs outputFasta: {description: "Fasta file that is co-located with the indexes"} outputFastaFai: {description: "Fasta index file for the outputFasta file."} outputFastaDict: {description: "Sequence dictionary for the outputFasta file."} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } From 3c53b47f4ba4e2c75fc104dabe972a50332552e6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Oct 2022 12:25:53 +0200 Subject: [PATCH 0999/1208] Add @DavyCats' suggestions --- bwa.wdl | 1 + samtools.wdl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/bwa.wdl b/bwa.wdl index e1e61bbe..66b8e8cc 100644 --- a/bwa.wdl +++ b/bwa.wdl @@ -148,6 +148,7 @@ task Index { memory: "~{size(fasta, 'G') + 1}GiB" time_minutes: timeMinutes } + parameter_meta { # inputs fasta: {description: "Reference fasta file.", category: "required"} diff --git a/samtools.wdl b/samtools.wdl index 76a07ef5..df712e51 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -95,6 +95,8 @@ task DictAndFaidx { runtime { memory: memory docker: dockerImage + time_minutes: timeMinutes + cpu: 1 } parameter_meta { From 0632414b9ae0663431e8a25b35463c9aa83badbe Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 4 Nov 2022 11:03:34 +0100 Subject: [PATCH 1000/1208] typo --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b522c02c..daf79c8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,6 @@ version 5.1.0-dev + Updated SURVIVOR version to 1.0.7 + Add a combined samtools dict and samtools faidx task. + Add a BWA index task. - + Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` previously. The WDL spec clearly distuingishes between SI and binary notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and From 4a42403fb4bf27ba21f63b99c7cb75f9d13adfeb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 8 Nov 2022 16:33:30 +0100 Subject: [PATCH 1001/1208] Fallback to copying when hardlinking does not work --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index df712e51..587a53fb 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -332,7 +332,7 @@ task Index { if [ ! -f ~{outputPath} ] then mkdir -p "$(dirname ~{outputPath})" - ln ~{bamFile} ~{outputPath} + ln ~{bamFile} ~{outputPath} || cp ~{bamFile} ~{outputPath} fi samtools index ~{outputPath} ~{bamIndexPath} ' @@ -531,7 +531,7 @@ task Tabix { mkdir -p "$(dirname ~{outputFilePath})" if [ ! -f ~{outputFilePath} ] then - ln ~{inputFile} ~{outputFilePath} + ln ~{inputFile} ~{outputFilePath} || cp ~{inputFile} ~{outputFilePath} fi tabix ~{outputFilePath} -p ~{type} } From daf19317d6f5aafc4e156910393f8bf02c012199 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 9 Nov 2022 15:27:37 +0100 Subject: [PATCH 1002/1208] remove second breakends in gridss AnnotateSvTypes script --- gridss.wdl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index cfe53751..9a09bdde 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -119,9 +119,14 @@ task AnnotateSvTypes { gr <- breakpointRanges(vcf) svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype - # GRIDSS doesn't supply a GT, so we estimate GT based on AF (assuming CN of 2, might be inaccurate) - geno(vcf)$GT <- ifelse(geno(vcf)$AF > 0.75, "1/1", ifelse(geno(vcf)$AF < 0.25, "0/0", "0/1")) - writeVcf(vcf, out_path, index=~{index}) + # GRIDSS doesn't supply a GT, simply set it to 0/1 + geno(vcf)$GT <- "0/1" + # Select only one breakend per event (also removes single breakends): + # sourceId ends with o or h for paired breakends, the first in the pair + # end with o the second with h. Single breakend end with b, these will + # also be removed since we can't determine the SVTYPE. + gr2 <- gr[grepl(".*o$", gr$sourceId)] + writeVcf(vcf[gr2$sourceId], out_path, index=~{index}) EOF >>> From 9cf522d5cf766ef7943226e8d4807643ee93721d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 10 Nov 2022 16:00:25 +0100 Subject: [PATCH 1003/1208] fix typing issue in AnnotateSvTypes R code --- gridss.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gridss.wdl b/gridss.wdl index 9a09bdde..8e1474c1 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -120,7 +120,7 @@ task AnnotateSvTypes { svtype <- simpleEventType(gr) info(vcf[gr$sourceId])$SVTYPE <- svtype # GRIDSS doesn't supply a GT, simply set it to 0/1 - geno(vcf)$GT <- "0/1" + geno(vcf)$GT <- as.matrix(sapply(row.names(vcf), function(x) {"0/1"})) # Select only one breakend per event (also removes single breakends): # sourceId ends with o or h for paired breakends, the first in the pair # end with o the second with h. Single breakend end with b, these will From 2e1c9972b01922cd915b7041b230e6287dda778b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 14 Nov 2022 13:40:06 +0100 Subject: [PATCH 1004/1208] fix issue where fastp errors if split is set to 1 --- fastp.wdl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/fastp.wdl b/fastp.wdl index 7f269d81..db4a2d40 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -41,11 +41,15 @@ task Fastp { String memory = "50GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads) String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3" + + Int? noneInt } String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") + Int? effectiveSplit = if select_first([split, 1]) > 1 then split else noneInt + command <<< set -e mkdir -p $(dirname ~{outputPathR1}) @@ -54,8 +58,8 @@ task Fastp { mkdir -p $(dirname ~{jsonPath}) # predict output paths - seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths - seq 1 ~{if defined(split) then split else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths + seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths + seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths fastp \ -i ~{read1} \ ~{"-I " + read2} \ @@ -66,21 +70,21 @@ task Fastp { -z ~{compressionLevel} \ ~{if correction then "--correction" else ""} \ --length_required ~{lengthRequired} \ - --thread ~{select_first([split, threads])} \ - ~{"--split " + split} \ - ~{if defined(split) then "-d 0" else ""} \ + --thread ~{select_first([effectiveSplit, threads])} \ + ~{"--split " + effectiveSplit} \ + ~{if defined(effectiveSplit) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} >>> output { File htmlReport = htmlPath File jsonReport = jsonPath - Array[File] clippedR1 = if defined(split) then read_lines("r1_paths") else [outputPathR1] - Array[File] clippedR2 = if defined(split) then read_lines("r2_paths") else [outputPathR2] + Array[File] clippedR1 = if defined(effectiveSplit) then read_lines("r1_paths") else [outputPathR1] + Array[File] clippedR2 = if defined(effectiveSplit) then read_lines("r2_paths") else [outputPathR2] } runtime { - cpu: select_first([split, threads]) + cpu: select_first([effectiveSplit, threads]) memory: memory time_minutes: timeMinutes docker: dockerImage From 636b1f0ea31168d9001ea7b45efe6d3333d944a3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Nov 2022 14:12:45 +0100 Subject: [PATCH 1005/1208] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index daf79c8f..2c4cff52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ The GRIDSS AnnotateSvTypes task now also removes the second breakend of + the breakpoints and single breakends. This will prepare the output better + to be passed into survivor. + Updated SURVIVOR version to 1.0.7 + Add a combined samtools dict and samtools faidx task. + Add a BWA index task. From b382cf745b6d7ed389bbca4efdfa70e37070d835 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 15 Nov 2022 14:23:00 +0100 Subject: [PATCH 1006/1208] adjusted runtime attributes for clever tasks --- CHANGELOG.md | 6 ++++++ clever.wdl | 8 ++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c4cff52..d2e95f60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,12 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Mateclever's runtime attribute defaults were changed to: + + memory: `"250GiB"` + + timeMinutes: `2880` ++ Clever's Prediction task's runtime attribute defaults were changed to: + + memory: `"80GiB"` + + timeMinutes: `2200` + The GRIDSS AnnotateSvTypes task now also removes the second breakend of the breakpoints and single breakends. This will prepare the output better to be passed into survivor. diff --git a/clever.wdl b/clever.wdl index 791a0ba1..3b819ed2 100644 --- a/clever.wdl +++ b/clever.wdl @@ -34,8 +34,8 @@ task Mateclever { Int maxOffset = 150 Int threads = 10 - String memory = "15GiB" - Int timeMinutes = 600 + String memory = "250GiB" + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } @@ -94,8 +94,8 @@ task Prediction { String outputPath = "./clever" Int threads = 10 - String memory = "55GiB" - Int timeMinutes = 480 + String memory = "80GiB" + Int timeMinutes = 2200 String dockerImage = "quay.io/biocontainers/clever-toolkit:2.4--py36hcfe0e84_6" } From 522f2046d07479d1964de103f8d75a190a4a5292 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Nov 2022 11:48:50 +0100 Subject: [PATCH 1007/1208] increase time for Amber --- hmftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hmftools.wdl b/hmftools.wdl index 5776dfed..3b09beb9 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -37,7 +37,7 @@ task Amber { Int threads = 2 String memory = "70GiB" String javaXmx = "64G" - Int timeMinutes = 240 + Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 78e02137e639dc35e24c6c9ac08a1efedfda7ebd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 25 Nov 2022 17:12:07 +0100 Subject: [PATCH 1008/1208] increase memory for amber --- hmftools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hmftools.wdl b/hmftools.wdl index 3b09beb9..e051dc99 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -35,8 +35,8 @@ task Amber { File referenceFastaDict Int threads = 2 - String memory = "70GiB" - String javaXmx = "64G" + String memory = "85GiB" + String javaXmx = "80G" Int timeMinutes = 480 String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0" } From 1a80829e5bc6b9f607d3cb748f7af6c47e90f8bf Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Jan 2023 16:37:32 +0100 Subject: [PATCH 1009/1208] Add targets file input to samtools view --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 587a53fb..8503777c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,6 +571,7 @@ task View { Int? excludeFilter Int? excludeSpecificFilter Int? MAPQthreshold + File? targetFile Int threads = 1 String memory = "1GiB" @@ -593,6 +594,7 @@ task View { ~{"-G " + excludeSpecificFilter} \ ~{"-q " + MAPQthreshold} \ ~{"--threads " + (threads - 1)} \ + ~{"--target-file " + targetFile} \ ~{inFile} samtools index ~{outputFileName} ~{outputIndexPath} } From 1ad000b1370898459d2ef3d6e2b3939699874c4f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Jan 2023 17:07:32 +0100 Subject: [PATCH 1010/1208] update samtools containers --- samtools.wdl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 8503777c..303f9821 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -72,7 +72,7 @@ task DictAndFaidx { String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputFile = basename(inputFile) @@ -119,7 +119,7 @@ task Faidx { String outputDir String memory = "2GiB" - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -168,7 +168,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam) * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -232,7 +232,7 @@ task FilterShortReadsBam { String memory = "1GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -278,7 +278,7 @@ task Flagstat { String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -318,7 +318,7 @@ task Index { String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } # Select_first is needed, otherwise womtool validate fails. @@ -369,7 +369,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } command { @@ -408,7 +408,7 @@ task Merge { Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -463,7 +463,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -576,7 +576,7 @@ task View { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.11--h6270b1f_0" + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } String outputIndexPath = basename(outputFileName) + ".bai" From d686e0870442c002b7902e9a8f33467dc404fa14 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:15:45 +0100 Subject: [PATCH 1011/1208] Add parameter_meta for targetFile --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 303f9821..771a9969 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -621,6 +621,7 @@ task View { excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"} excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"} MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"} + targetFile: {description: "A BED file with regions to include", caegory: "advanced"} threads: {description: "The number of threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From b52e3250eb5823b0ddbe4363eb3a77ab798d6fd0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:17:38 +0100 Subject: [PATCH 1012/1208] Update changelog with samtools change --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2e95f60..c6b5e609 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Update samtools image to version 1.16. ++ Add targetsFile input for samtools View. + Mateclever's runtime attribute defaults were changed to: + memory: `"250GiB"` + timeMinutes: `2880` From e1abb7dc92090bb836b6468be9ae33dc1696a44d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 16 Jan 2023 14:21:22 +0100 Subject: [PATCH 1013/1208] Use latest version of scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 98cc3e10..84690a30 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 +Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 From 9fce64caa41bf1cd0ec5e43337a31f3c8a8466cf Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 23 Jan 2023 12:07:38 +0100 Subject: [PATCH 1014/1208] add memory runtime attribute to tabix task --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 771a9969..fbb445e7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -542,6 +542,7 @@ task Tabix { } runtime { + memory: "2GiB" time_minutes: timeMinutes docker: dockerImage } From 5f5d51a3515b78c0d290e23a022255207c95bb7c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 24 Jan 2023 16:37:48 +0100 Subject: [PATCH 1015/1208] add various tasks for somatic SV calling --- delly.wdl | 65 ++++++++++++++++++--- gridss.wdl | 161 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 214 insertions(+), 12 deletions(-) diff --git a/delly.wdl b/delly.wdl index 7333c5ff..fab32784 100644 --- a/delly.wdl +++ b/delly.wdl @@ -22,15 +22,17 @@ version 1.0 task CallSV { input { - File bamFile - File bamIndex + Array[File]+ bamFile + Array[File]+ bamIndex File referenceFasta File referenceFastaFai String outputPath = "./delly/delly.bcf" + File? genotypeBcf + String memory = "15GiB" Int timeMinutes = 300 - String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1" + String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" } command { @@ -39,7 +41,8 @@ task CallSV { delly call \ -o ~{outputPath} \ -g ~{referenceFasta} \ - ~{bamFile} + ~{"-v " + genotypeBcf} \ + ~{sep=" " bamFile} } output { @@ -54,11 +57,12 @@ task CallSV { parameter_meta { # inputs - bamFile: {description: "The bam file to process.", category: "required"} - bamIndex: {description: "The index bam file.", category: "required"} + bamFile: {description: "The bam files to process.", category: "required"} + bamIndex: {description: "The indexes for the bam files.", category: "required"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } - outputPath: {description: "The location the output VCF file should be written.", category: "common"} + outputPath: {description: "The location the output BCF file should be written.", category: "common"} + genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples."} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -67,3 +71,50 @@ task CallSV { dellyBcf: {description: "File containing structural variants."} } } + + +task SomaticFilter { + input { + File dellyBcf + Array[String]+ normalSamples + Array[String]+ tumorSamples + String outputPath = "./delly/delly_filter.bcf" + + String memory = "15GiB" + Int timeMinutes = 300 + String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" + } + + command <<< + set -e + mkdir -p "$(dirname ~{outputPath})" + for SAMPLE in ~{sep=" " normalSamples}; do echo -e "${SAMPLE}\tcontrol" >> samples.tsv; done + for SAMPLE in ~{sep=" " tumorSamples}; do echo -e "${SAMPLE}\ttumor" >> samples.tsv; done + + delly filter \ + -f somatic \ + -o ~{outputPath} \ + -s samples.tsv \ + ~{dellyBcf} + >>> + + output { + File filterBcf = outputPath + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + dellyBcf: {description: "The BCF file produced by delly.", category: "required"} + normalSamples: {description: "The names for the normal samples as used in the delly BCF file.", category: "required"} + tumorSamples: {description: "The names for the tumor samples as used in the delly BCF file.", category: "required"} + outputPath: {description: "The location the output BCF file should be written.", category: "common"} + memory: {description: "The memory required to run the programs.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file diff --git a/gridss.wdl b/gridss.wdl index 8e1474c1..647f2d67 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -152,11 +152,108 @@ task AnnotateSvTypes { } } +task FilterPon { + input { + File ponBed + File ponBedpe + Int minimumScore = 3 + String outputDir = "." + + String memory = "1GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 20 + } + + command { + set -e + mkdir -p ~{outputDir} + + cat ~{ponBed} | awk '{if ($5 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_single_breakend.bed + cat ~{ponBedpe} | awk '{if ($8 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_breakpoint.bedpe + } + + output { + File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" + File bed = "~{outputDir}/gridss_pon_single_breakend.bed" + } + + runtime { + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + ponBed: {description: "The PON BED file.", category: "required"} + ponBedpe: {description: "The PON BEDPE file.", category: "required"} + minimumScore: {description: "The minimum number normal samples an SV must have been found in to be kept.", category: "advanced"} + outputDir: {description: "The directory the output will be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + +task GeneratePonBedpe { + input { + Array[File]+ vcfFiles + Array[File]+ vcfIndexes + File referenceFasta + String outputDir = "." + + Int threads = 8 + String javaXmx = "8G" + String memory = "9GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 120 + } + + command { + set -e + mkdir -p ~{outputDir} + java -Xmx~{javaXmx} \ + -cp /usr/local/share/gridss-2.12.2-0/gridss.jar \ + gridss.GeneratePonBedpe \ + INPUT=~{sep=" INPUT=" vcfFiles} \ + O=~{outputDir}/gridss_pon_breakpoint.bedpe \ + SBO=~{outputDir}/gridss_pon_single_breakend.bed \ + REFERENCE_SEQUENCE=~{referenceFasta} \ + THREADS=~{threads} + } + + output { + File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" + File bed = "~{outputDir}/gridss_pon_single_breakend.bed" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} + referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + outputDir: {description: "The directory the output will be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task GRIDSS { input { - File tumorBam - File tumorBai - String tumorLabel + Array[File]+ tumorBam + Array[File]+ tumorBai + Array[String]+ tumorLabel BwaIndex reference String outputPrefix = "gridss" @@ -184,10 +281,10 @@ task GRIDSS { ~{"-c " + gridssProperties} \ ~{"-t " + threads} \ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \ - --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{tumorLabel} \ + --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{sep="," tumorLabel} \ ~{"--blacklist " + blacklistBed} \ ~{normalBam} \ - ~{tumorBam} + ~{sep=" " tumorBam} samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai # For some reason the VCF index is sometimes missing @@ -283,6 +380,60 @@ task GridssAnnotateVcfRepeatmasker { } } +task SomaticFilter { + input { + File vcfFile + File vcfIndex + File ponBed + File ponBedpe + String outputPath = "./high_confidence_somatic.vcf.gz" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf.gz" + + String memory = "16GiB" + String dockerImage = "quay.io/biowdl/gridss:2.12.2" + Int timeMinutes = 60 + } + + command { + set -e + mkdir -p $(dirname ~{outputPath}) + mkdir -p $(dirname ~{fullOutputPath}) + + gridss_somatic_filter \ + --pondir ~{dirname(ponBed)} \ + --input ~{vcfFile} \ + --output ~{outputPath} \ + --fulloutput ~{fullOutputPath} + } + + output { + File fullVcf = fullOutputPath + File fullVcfIndex = "~{fullOutputPath}.tbi" + File highConfidenceVcf = outputPath + File highConfidenceVcfIndex = "~{outputPath}.tbi" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes # !UnknownRuntimeKey + docker: dockerImage + } + + parameter_meta { + vcfFile: {description: "The GRIDSS VCF file.", category: "required"} + vcfIndex: {description: "The index for the GRIDSS VCF file.", category: "required"} + ponBed: {description: "The PON BED file.", category: "required"} + ponBedpe: {description: "The PON BEDPE file.", category: "required"} + outputPath: {description: "The path the high confidence output should be written to.", category: "common"} + fullOutputPath: {description: "The path the full output should be written to.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} + task Virusbreakend { input { File bam From 90bcc945807e9ef2c13fbd542d69f3b912995a0c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 25 Jan 2023 14:06:10 +0100 Subject: [PATCH 1016/1208] fix lint issues --- gridss.wdl | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 647f2d67..82ac7fbd 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -164,13 +164,13 @@ task FilterPon { Int timeMinutes = 20 } - command { + command <<< set -e mkdir -p ~{outputDir} cat ~{ponBed} | awk '{if ($5 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_single_breakend.bed cat ~{ponBedpe} | awk '{if ($8 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_breakpoint.bedpe - } + >>> output { File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe" @@ -189,8 +189,6 @@ task FilterPon { minimumScore: {description: "The minimum number normal samples an SV must have been found in to be kept.", category: "advanced"} outputDir: {description: "The directory the output will be written to.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", - category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -394,13 +392,15 @@ task SomaticFilter { Int timeMinutes = 60 } + String ponDir = sub(ponBed, basename(ponBed), "") + command { set -e mkdir -p $(dirname ~{outputPath}) mkdir -p $(dirname ~{fullOutputPath}) gridss_somatic_filter \ - --pondir ~{dirname(ponBed)} \ + --pondir ~{ponDir} \ --input ~{vcfFile} \ --output ~{outputPath} \ --fulloutput ~{fullOutputPath} @@ -414,7 +414,6 @@ task SomaticFilter { } runtime { - cpu: threads memory: memory time_minutes: timeMinutes # !UnknownRuntimeKey docker: dockerImage From 34b3732319f7d74c72f93ff1bcb05ccc675585f8 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 16:47:10 +0100 Subject: [PATCH 1017/1208] Add a number of macs2 flags so we can adhere to Encode --- macs2.wdl | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index e6a011ad..53be0abd 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -30,8 +30,15 @@ task PeakCalling { String sampleName String format = "AUTO" Boolean nomodel = false + String gensz = "hs" + Int extsize + Int shiftsize = -1*round(extsize/2) + Float pval_thres = 0.01 + Boolean bdg = true + String keepdup = "auto" + String callsummits = true Int timeMinutes = 600 # Default to 10 hours - String memory = "8GiB" + String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -43,7 +50,14 @@ task PeakCalling { --outdir ~{outDir} \ --name ~{sampleName} \ -f ~{format} \ - ~{true='--nomodel' false='' nomodel} + -g ~{gensz} \ + -p ~{pval_thres} \ + --shift ~{shiftsize} \ + --extsize ~{extsize} \ + ~{true='--nomodel' false='' nomodel} \ + ~{true='-B' false='' bdg} \ + --keep-dup ~{keepdup} \ + ~{true='--call-summits' false='' callsummits} } output { @@ -64,6 +78,13 @@ task PeakCalling { sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} + gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced."} + pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value."} + shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction"} + extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments."} + bdg: {description: "macs2 argument that ebanbles the storage of the fragment pileup, control lambda in bedGraph files."} + keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location."} + callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure."} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ae937f28ab0147b572916c97448f6c788fa58e19 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 16:55:23 +0100 Subject: [PATCH 1018/1208] Fix data type error --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 53be0abd..854db814 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -36,7 +36,7 @@ task PeakCalling { Float pval_thres = 0.01 Boolean bdg = true String keepdup = "auto" - String callsummits = true + Boolean callsummits = true Int timeMinutes = 600 # Default to 10 hours String memory = "8G" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" From 2dca5f3611fd3aef0ee501cbe05467b590c93280 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Wed, 25 Jan 2023 17:29:04 +0100 Subject: [PATCH 1019/1208] Address comments from Ruben --- macs2.wdl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 854db814..7b11c99f 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,16 +29,16 @@ task PeakCalling { String outDir = "macs2" String sampleName String format = "AUTO" - Boolean nomodel = false - String gensz = "hs" - Int extsize - Int shiftsize = -1*round(extsize/2) - Float pval_thres = 0.01 - Boolean bdg = true - String keepdup = "auto" - Boolean callsummits = true + Boolean? nomodel + String? gensz + Int? extsize + Int? shiftsize = -1*round(extsize/2) + Float? pval_thres + Boolean? bdg + String? keepdup + Boolean? callsummits Int timeMinutes = 600 # Default to 10 hours - String memory = "8G" + String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" } @@ -49,14 +49,14 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ - -f ~{format} \ - -g ~{gensz} \ - -p ~{pval_thres} \ - --shift ~{shiftsize} \ - --extsize ~{extsize} \ + ~{"-f" + format} \ + ~{"-g" + gensz} \ + ~{"-p" + pval_thres} \ + ~{"--shift" + shiftsize} \ + ~{"--extsize" + extsize} \ ~{true='--nomodel' false='' nomodel} \ ~{true='-B' false='' bdg} \ - --keep-dup ~{keepdup} \ + ~{"--keep-dup" + keepdup} \ ~{true='--call-summits' false='' callsummits} } From e89b1d7d13fef289ba17ee0f6acc8e8b5415a217 Mon Sep 17 00:00:00 2001 From: imoustakas Date: Thu, 26 Jan 2023 10:31:15 +0100 Subject: [PATCH 1020/1208] Delete calculation for shiftsize --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 7b11c99f..8d89f3af 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -32,7 +32,7 @@ task PeakCalling { Boolean? nomodel String? gensz Int? extsize - Int? shiftsize = -1*round(extsize/2) + Int? shiftsize Float? pval_thres Boolean? bdg String? keepdup From e996878ae65113bc66add0caaf7b5d9efc75ad73 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:13 +0100 Subject: [PATCH 1021/1208] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 8d89f3af..70fea707 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -29,7 +29,7 @@ task PeakCalling { String outDir = "macs2" String sampleName String format = "AUTO" - Boolean? nomodel + Boolean nomodel = false String? gensz Int? extsize Int? shiftsize From 055246a9082ec004ab335c7525685c888fd6e27f Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:24 +0100 Subject: [PATCH 1022/1208] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 70fea707..2c3bf57c 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -34,7 +34,7 @@ task PeakCalling { Int? extsize Int? shiftsize Float? pval_thres - Boolean? bdg + Boolean bdg = false String? keepdup Boolean? callsummits Int timeMinutes = 600 # Default to 10 hours From 72bbcce9084408ee7ba68a04dd8f121a8a793390 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:35 +0100 Subject: [PATCH 1023/1208] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macs2.wdl b/macs2.wdl index 2c3bf57c..c4c08ed5 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -36,7 +36,7 @@ task PeakCalling { Float? pval_thres Boolean bdg = false String? keepdup - Boolean? callsummits + Boolean callsummits = false Int timeMinutes = 600 # Default to 10 hours String memory = "8GiB" String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0" From 4b9754f548b8558e7de2652e257edd807d0d4ffa Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 10:59:43 +0100 Subject: [PATCH 1024/1208] Update macs2.wdl Co-authored-by: Davy Cats --- macs2.wdl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index c4c08ed5..9d5344ae 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -78,13 +78,13 @@ task PeakCalling { sampleName: {description: "Name of the sample to be analysed", category: "required"} outDir: {description: "All output files will be written in this directory.", category: "advanced"} nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"} - gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced."} - pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value."} - shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction"} - extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments."} - bdg: {description: "macs2 argument that ebanbles the storage of the fragment pileup, control lambda in bedGraph files."} - keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location."} - callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure."} + gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced.", category: "advanced"} + pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value.", category: "advanced"} + shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction.", category: "advanced"} + extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments.", category: "advanced"} + bdg: {description: "macs2 argument that enables the storage of the fragment pileup, control lambda in bedGraph files.", category: "advanced"} + keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location.", category: "advanced"} + callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From ece0782a37451b82677eedd1ed771d823b56e891 Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Thu, 26 Jan 2023 11:26:19 +0100 Subject: [PATCH 1025/1208] Update CHANGELOG.md --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6b5e609..4962c687 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,15 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + + nomodel + + gensz + + extsize + + shiftsize + + pval_thres + + bdg + + keepdup + + callsummits + Update samtools image to version 1.16. + Add targetsFile input for samtools View. + Mateclever's runtime attribute defaults were changed to: From 2b4fb7ea3fc9270af1caaea897f35d2b319c35fd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 26 Jan 2023 14:32:37 +0100 Subject: [PATCH 1026/1208] add missing paramter_meta --- gridss.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/gridss.wdl b/gridss.wdl index 82ac7fbd..8b27df77 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -238,6 +238,7 @@ task GeneratePonBedpe { vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "common"} + threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} From 6d0329539033821b68ef31234ae7d6f920505aed Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Fri, 27 Jan 2023 09:41:54 +0100 Subject: [PATCH 1027/1208] Add space between flag and the value following --- macs2.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/macs2.wdl b/macs2.wdl index 9d5344ae..5ccc5a5f 100644 --- a/macs2.wdl +++ b/macs2.wdl @@ -49,14 +49,14 @@ task PeakCalling { ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \ --outdir ~{outDir} \ --name ~{sampleName} \ - ~{"-f" + format} \ - ~{"-g" + gensz} \ - ~{"-p" + pval_thres} \ - ~{"--shift" + shiftsize} \ - ~{"--extsize" + extsize} \ + ~{"-f " + format} \ + ~{"-g " + gensz} \ + ~{"-p " + pval_thres} \ + ~{"--shift " + shiftsize} \ + ~{"--extsize " + extsize} \ ~{true='--nomodel' false='' nomodel} \ ~{true='-B' false='' bdg} \ - ~{"--keep-dup" + keepdup} \ + ~{"--keep-dup " + keepdup} \ ~{true='--call-summits' false='' callsummits} } From b79e59b1f3279bfcb26446ee5c95f1c6bfb4b16e Mon Sep 17 00:00:00 2001 From: Giannis Moustakas Date: Fri, 27 Jan 2023 09:44:00 +0100 Subject: [PATCH 1028/1208] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4962c687..bd66a6ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + nomodel + gensz From ee0b137664a20f94997e9daad8b25cc2729dc88a Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:25:12 +0100 Subject: [PATCH 1029/1208] increase time for manta, add index to delly outputs --- delly.wdl | 2 ++ manta.wdl | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/delly.wdl b/delly.wdl index fab32784..43af3ca0 100644 --- a/delly.wdl +++ b/delly.wdl @@ -47,6 +47,7 @@ task CallSV { output { File dellyBcf = outputPath + File dellyBcfIndex = outputPath + ".csi" } runtime { @@ -100,6 +101,7 @@ task SomaticFilter { output { File filterBcf = outputPath + File filterBcfIndex = outputPath + ".csi" } runtime { diff --git a/manta.wdl b/manta.wdl index 6804f304..fde8c208 100644 --- a/manta.wdl +++ b/manta.wdl @@ -34,7 +34,7 @@ task Germline { Int cores = 1 Int memoryGb = 4 - Int timeMinutes = 60 + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } @@ -102,7 +102,7 @@ task Somatic { Int cores = 1 Int memoryGb = 4 - Int timeMinutes = 60 + Int timeMinutes = 2880 String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1" } From 1bf7725df8ff78628b3444d8ab6b6daa044836fd Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:28:02 +0100 Subject: [PATCH 1030/1208] add bcf index input for delly somatic filter --- delly.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/delly.wdl b/delly.wdl index 43af3ca0..ab72f060 100644 --- a/delly.wdl +++ b/delly.wdl @@ -77,6 +77,7 @@ task CallSV { task SomaticFilter { input { File dellyBcf + File dellyBcfIndex Array[String]+ normalSamples Array[String]+ tumorSamples String outputPath = "./delly/delly_filter.bcf" From 9af2205811e0708be46be8e88bc1c7e1387fdfda Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:33:30 +0100 Subject: [PATCH 1031/1208] add index to delly call inputs as well --- delly.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/delly.wdl b/delly.wdl index ab72f060..12e68187 100644 --- a/delly.wdl +++ b/delly.wdl @@ -29,6 +29,7 @@ task CallSV { String outputPath = "./delly/delly.bcf" File? genotypeBcf + File? genotypeBcfIndex String memory = "15GiB" Int timeMinutes = 300 From 71193e8da89c9275c7f6d878e349f1bdc19543ba Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 27 Jan 2023 13:35:07 +0100 Subject: [PATCH 1032/1208] update parameter_meta --- delly.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index 12e68187..2dc847b9 100644 --- a/delly.wdl +++ b/delly.wdl @@ -64,7 +64,8 @@ task CallSV { referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"} referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" } outputPath: {description: "The location the output BCF file should be written.", category: "common"} - genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples."} + genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples.", category: "advanced"} + genotypeBcfIndex: {description: "The index for the genotype BCF file.", category: "advanced"} memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -114,6 +115,7 @@ task SomaticFilter { parameter_meta { dellyBcf: {description: "The BCF file produced by delly.", category: "required"} + dellyBcfIndex: {description: "The index for the delly BCF file.", category: "required"} normalSamples: {description: "The names for the normal samples as used in the delly BCF file.", category: "required"} tumorSamples: {description: "The names for the tumor samples as used in the delly BCF file.", category: "required"} outputPath: {description: "The location the output BCF file should be written.", category: "common"} From dd9ea3db69c56bef6c1d5ed63c08e10e691c6d5b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Jan 2023 11:13:40 +0100 Subject: [PATCH 1033/1208] give delly more time, specify normal ordinal in gridss GeneratePonBedpe command --- delly.wdl | 2 +- gridss.wdl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/delly.wdl b/delly.wdl index 2dc847b9..b952da7e 100644 --- a/delly.wdl +++ b/delly.wdl @@ -32,7 +32,7 @@ task CallSV { File? genotypeBcfIndex String memory = "15GiB" - Int timeMinutes = 300 + Int timeMinutes = 600 String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0" } diff --git a/gridss.wdl b/gridss.wdl index 8b27df77..5c203a16 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -216,6 +216,7 @@ task GeneratePonBedpe { -cp /usr/local/share/gridss-2.12.2-0/gridss.jar \ gridss.GeneratePonBedpe \ INPUT=~{sep=" INPUT=" vcfFiles} \ + NO=0 \ O=~{outputDir}/gridss_pon_breakpoint.bedpe \ SBO=~{outputDir}/gridss_pon_single_breakend.bed \ REFERENCE_SEQUENCE=~{referenceFasta} \ From 48340415ab9c852ceefaf35e2b4e2ae8b47d3f66 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 31 Jan 2023 14:34:06 +0100 Subject: [PATCH 1034/1208] add missing fasta index input to gridss GeneratePonBedpe --- gridss.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gridss.wdl b/gridss.wdl index 5c203a16..03fdc6ab 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -200,6 +200,7 @@ task GeneratePonBedpe { Array[File]+ vcfFiles Array[File]+ vcfIndexes File referenceFasta + File referenceFastaFai String outputDir = "." Int threads = 8 @@ -238,6 +239,7 @@ task GeneratePonBedpe { parameter_meta { vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"} referenceFasta: {description: "The fasta of the reference genome.", category: "required"} + referenceFastaFai: {description: "The index for the reference genome fasta.", category: "required"} outputDir: {description: "The directory the output will be written to.", category: "common"} threads: {description: "The number of the threads to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} From 42796e37927b50b2dc25249a5ff92348ebf54ce0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Feb 2023 16:03:47 +0100 Subject: [PATCH 1035/1208] fix output paths gridss somatic filter --- gridss.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index 03fdc6ab..b67f4c91 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -388,8 +388,8 @@ task SomaticFilter { File vcfIndex File ponBed File ponBedpe - String outputPath = "./high_confidence_somatic.vcf.gz" - String fullOutputPath = "./high_and_low_confidence_somatic.vcf.gz" + String outputPath = "./high_confidence_somatic.vcf.bgz" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf.bgz" String memory = "16GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" From b17076a642b17212499b6478e948661b0e9433c3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 1 Feb 2023 16:23:37 +0100 Subject: [PATCH 1036/1208] fix gridss somatic filter output paths? --- gridss.wdl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gridss.wdl b/gridss.wdl index b67f4c91..5aca3825 100644 --- a/gridss.wdl +++ b/gridss.wdl @@ -388,8 +388,8 @@ task SomaticFilter { File vcfIndex File ponBed File ponBedpe - String outputPath = "./high_confidence_somatic.vcf.bgz" - String fullOutputPath = "./high_and_low_confidence_somatic.vcf.bgz" + String outputPath = "./high_confidence_somatic.vcf" + String fullOutputPath = "./high_and_low_confidence_somatic.vcf" String memory = "16GiB" String dockerImage = "quay.io/biowdl/gridss:2.12.2" @@ -411,10 +411,10 @@ task SomaticFilter { } output { - File fullVcf = fullOutputPath - File fullVcfIndex = "~{fullOutputPath}.tbi" - File highConfidenceVcf = outputPath - File highConfidenceVcfIndex = "~{outputPath}.tbi" + File fullVcf = "~{fullOutputPath}.bgz" + File fullVcfIndex = "~{fullOutputPath}.bgz.tbi" + File highConfidenceVcf = "~{outputPath}.bgz" + File highConfidenceVcfIndex = "~{outputPath}.bgz.tbi" } runtime { From d320b3c79bfc321fff1178ff571af520b7969043 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 6 Feb 2023 14:11:59 +0100 Subject: [PATCH 1037/1208] add samples option to bcftools view --- bcftools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 726d2e37..7df8911d 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -349,6 +349,7 @@ task View { String? exclude String? include + Array[String] samples = [] String memory = "256MiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) @@ -364,6 +365,7 @@ task View { ~{"--exclude " + exclude} \ ~{"--include " + include} \ ~{true="--exclude-uncalled" false="" excludeUncalled} \ + ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ ~{inputFile} @@ -389,6 +391,7 @@ task View { include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} + samples: {description: "A list of sample names to include.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From 3961ab4e858d31163987bb267cbad30ea085b205 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:51:45 +0100 Subject: [PATCH 1038/1208] Allow a custom separator char --- umi.wdl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/umi.wdl b/umi.wdl index e7f01fc2..e4270ed6 100644 --- a/umi.wdl +++ b/umi.wdl @@ -29,6 +29,7 @@ task BamReadNameToUmiTag { File inputBam String outputPath = "output.bam" String umiTag = "RX" + String separatorChar = "_" String memory = "2GiB" Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10) @@ -45,26 +46,26 @@ task BamReadNameToUmiTag { from typing import Tuple - def split_umi_from_name(name) -> Tuple[str, str]: + def split_umi_from_name(name, separator_char = "_") -> Tuple[str, str]: id_and_rest = name.split(maxsplit=1) id = id_and_rest[0] # If there was no whitespace id_and_rest will have length 1 other_parts = id_and_rest[1] if len(id_and_rest) == 2 else "" - underscore_index = id.rfind("_") + underscore_index = id.rfind(separator_char) umi = id[underscore_index + 1:] new_id = id[:underscore_index] if other_parts: return " ".join([new_id, other_parts]), umi return new_id, umi - def annotate_umis(in_file, out_file, bam_tag="RX"): + def annotate_umis(in_file, out_file, bam_tag="RX", separator_char = "_"): in_bam = pysam.AlignmentFile(in_file, "rb") os.makedirs(os.path.dirname(out_file), exist_ok=True) out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam) # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway. encoded_bam_tag = bam_tag.encode('ascii') for segment in in_bam: # type: pysam.AlignedSegment - new_name, umi = split_umi_from_name(segment.query_name) + new_name, umi = split_umi_from_name(segment.query_name, separator_char) segment.query_name = new_name # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway. # Value type has to be a string though, otherwise pysam crashes. @@ -72,7 +73,7 @@ task BamReadNameToUmiTag { out_bam.write(segment) if __name__ == "__main__": - annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}") + annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}", "~{separatorChar}") pysam.index("~{outputPath}", "~{bamIndexPath}", b=True) CODE >>> @@ -93,6 +94,7 @@ task BamReadNameToUmiTag { inputBam: {description: "The input SAM file.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "common"} umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} + separatorChar: {description: "Character used to separate the UMIs from the read name", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 84a8781c4c94be08ba0f404902378d05db18fef9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:56:03 +0100 Subject: [PATCH 1039/1208] Update changelog with separatorChar --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bd66a6ba..4bab712a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + nomodel From b3c9204b77851836042190486f8031dbe79a9e2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Feb 2023 14:57:25 +0100 Subject: [PATCH 1040/1208] Add missing interpunction --- umi.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/umi.wdl b/umi.wdl index e4270ed6..0628783a 100644 --- a/umi.wdl +++ b/umi.wdl @@ -94,7 +94,7 @@ task BamReadNameToUmiTag { inputBam: {description: "The input SAM file.", category: "required"} outputPath: {description: "Output directory path + output file.", category: "common"} umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"} - separatorChar: {description: "Character used to separate the UMIs from the read name", category: "common"} + separatorChar: {description: "Character used to separate the UMIs from the read name.", category: "common"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From c3f246f24d05bda4ebfa781cff41dfe61bbf85b3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:05:31 +0100 Subject: [PATCH 1041/1208] update changelog --- CHANGELOG.md | 2 ++ scripts | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bab712a..3021817d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add a task for fastp. ++ Add a task for picard CollectInsertSizeMetrics. + Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: diff --git a/scripts b/scripts index 84690a30..98cc3e10 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 From ad97efa05229f147435ee0800b0a742a2c360435 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:06:00 +0100 Subject: [PATCH 1042/1208] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 98cc3e10..4142daab 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 98cc3e10125c853a70f41ceccf8f9d5428d4c1a3 +Subproject commit 4142daab81a7d9f28686b6a3299536757d381c81 From 669428627e26aaaafdba3ab680a37236eaa736da Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:20:13 +0100 Subject: [PATCH 1043/1208] update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bab712a..6e1daf97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,14 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Increased the timeMinutes runtime attribute for manta (somatic and germline) to `2880`. ++ Add a task for GRIDSS somatic filtering. ++ Add a task to generate a panel of normals BED and BEDPE file for GRIDSS. ++ Add a task to filter a GRIDSS PON. ++ Add a task for delly somatic filtering. ++ Delly CallSV's `bamFile` and `bamIndex` inputs are not arrays of files, allowing + for multiple samples to be included. ++ Add `samples` input to bcftools view to select samples included in the output vcf. + Add a separatorChar input to the tagUmi task. + Bug fix: Add space between flag and the value provided for macs2 + Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: From 7b9e07652461788748ed4907dd8264cbbb27ce80 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 16 Feb 2023 13:20:49 +0100 Subject: [PATCH 1044/1208] update scripts --- scripts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts b/scripts index 84690a30..4142daab 160000 --- a/scripts +++ b/scripts @@ -1 +1 @@ -Subproject commit 84690a30eb0dde2bfdfaff9abf097b6f4c49dfd8 +Subproject commit 4142daab81a7d9f28686b6a3299536757d381c81 From eba9ad4c057cf7468bd7982930af484765d1a257 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 23 Feb 2023 15:55:01 +0100 Subject: [PATCH 1045/1208] add some options to disable filters in fastp --- fastp.wdl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index db4a2d40..68c0e5cd 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -36,6 +36,8 @@ task Fastp { Int lengthRequired = 15 Int? split Boolean performAdapterTrimming = true + Boolean performQualityFiltering = true + Boolean performLengthFiltering = true Int threads = 4 String memory = "50GiB" @@ -73,7 +75,9 @@ task Fastp { --thread ~{select_first([effectiveSplit, threads])} \ ~{"--split " + effectiveSplit} \ ~{if defined(effectiveSplit) then "-d 0" else ""} \ - ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} + ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} \ + ~{if performQualityFiltering then "" else "--disable_quality_filtering"} \ + ~{if performLengthFiltering then "" else "--disable_length_filtering"} >>> output { @@ -102,6 +106,8 @@ task Fastp { lengthRequired: {description: "The minimum read length.", category: "advanced"} split: {description: "The number of chunks to split the files into. Number of threads will be set equal to the amount of splits.", category: "common"} performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} + performQualityFiltering: {description: "Whether reads should be filtered based on quality scores.", category: "advanced"} + performLengthFiltering: {description: "Whether reads shoulde be filtered based on lengths.", catgegory: "advanced"} threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From ab54bb588cd66f009df79bbf00b2238f0436fad6 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Mar 2023 14:11:06 +0100 Subject: [PATCH 1046/1208] add option to enable/disable ploy-g trimming to fastp task --- fastp.wdl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fastp.wdl b/fastp.wdl index 68c0e5cd..9849738b 100644 --- a/fastp.wdl +++ b/fastp.wdl @@ -38,6 +38,7 @@ task Fastp { Boolean performAdapterTrimming = true Boolean performQualityFiltering = true Boolean performLengthFiltering = true + Boolean? performPolyGTrimming Int threads = 4 String memory = "50GiB" @@ -50,6 +51,11 @@ task Fastp { String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "") String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "") + String polyGTrimmingFlag = if defined(performPolyGTrimming) + then + if select_first([performPolyGTrimming]) then "--trim_poly_g" else "--disable_trim_poly_g" + else "" + Int? effectiveSplit = if select_first([split, 1]) > 1 then split else noneInt command <<< @@ -77,7 +83,8 @@ task Fastp { ~{if defined(effectiveSplit) then "-d 0" else ""} \ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} \ ~{if performQualityFiltering then "" else "--disable_quality_filtering"} \ - ~{if performLengthFiltering then "" else "--disable_length_filtering"} + ~{if performLengthFiltering then "" else "--disable_length_filtering"} \ + ~{polyGTrimmingFlag} >>> output { @@ -108,6 +115,7 @@ task Fastp { performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"} performQualityFiltering: {description: "Whether reads should be filtered based on quality scores.", category: "advanced"} performLengthFiltering: {description: "Whether reads shoulde be filtered based on lengths.", catgegory: "advanced"} + performPolyGTrimming: {description: "Whether or not poly-G-tail trimming should be performed. If undefined fastp's default behaviour will be used, ie. enabled for NextSeq/NovaSeq data as detected from read headers.", category: "advanced"} threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 5d35105b452167ab9e09a9b0d9c041d2af84f253 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 3 Mar 2023 16:30:34 +0100 Subject: [PATCH 1047/1208] add purple options needed for shallow mode --- hmftools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hmftools.wdl b/hmftools.wdl index e051dc99..78156f67 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1070,6 +1070,8 @@ task Purple { File driverGenePanel File somaticHotspots File germlineHotspots + Float? highlyDiploidPercentage + Float? somaticMinPuritySpread #The following should be in the same directory. File geneDataCsv File proteinFeaturesCsv @@ -1103,6 +1105,8 @@ task Purple { -run_drivers \ -somatic_hotspots ~{somaticHotspots} \ -driver_gene_panel ~{driverGenePanel} \ + ~{"-highly_diploid_percentage " + highlyDiploidPercentage} \ + ~{"-somatic_min_purity_spread " + somaticMinPuritySpread} \ -threads ~{threads} } From 36a4575e20c54b062995b96c24f68733affce707 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Fri, 10 Mar 2023 11:14:24 +0100 Subject: [PATCH 1048/1208] update parameter_meta and changelog --- CHANGELOG.md | 2 ++ hmftools.wdl | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ce03ffc..753daf30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ that users understand how the changes affect the new version. --> version 5.1.0-dev --------------------------- ++ Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the + hmtools PURPLE task. + Add a task for fastp. + Add a task for picard CollectInsertSizeMetrics. + Increased the timeMinutes runtime attribute for manta (somatic and germline) to `2880`. diff --git a/hmftools.wdl b/hmftools.wdl index 78156f67..c27630a1 100644 --- a/hmftools.wdl +++ b/hmftools.wdl @@ -1186,6 +1186,8 @@ task Purple { driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"} somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"} germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"} + highlyDiploidPercentage: {description: "Equivalent to PURPLE's `-highly_diploid_percentage` option.", category: "advanced"} + somaticMinPuritySpread: {description: "Equivalent to PURPLE's `-somatic_min_purity_spread` option.", category: "advanced"} geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"} transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"} From 1a57c2ed292504f138d8bb15ae145b7145ba6c1c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 15:08:41 +0200 Subject: [PATCH 1049/1208] Set stable version --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 753daf30..7e62171b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.1.0-dev +version 5.1.0 --------------------------- + Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the hmtools PURPLE task. From 9394a3e29a0227e3dc1dc30700ad1d7e65b7e448 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 15:10:35 +0200 Subject: [PATCH 1050/1208] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 09b254e9..91ff5727 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -6.0.0 +5.2.0 From 64aa91e7db5e96625122b4484fb7d857a9ef2c13 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 16:32:09 +0200 Subject: [PATCH 1051/1208] Update cutadapt and FastQC --- CHANGELOG.md | 6 ++++++ cutadapt.wdl | 2 +- fastqc.wdl | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e62171b..a13b2f6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ Newest changes should be on top. This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> + +version 5.2.0-dev +--------------------------- ++ Update cutadapt version to 4.4 ++ Update FastQC version to 0.12.1 + version 5.1.0 --------------------------- + Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the diff --git a/cutadapt.wdl b/cutadapt.wdl index 9a67692c..191e6f0a 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -85,7 +85,7 @@ task Cutadapt { Int cores = 4 String memory = "5GiB" Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores) - String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1" + String dockerImage = "quay.io/biocontainers/cutadapt:4.4--py310h1425a21_0" } String realRead2output = select_first([read2output, "cut_r2.fq.gz"]) diff --git a/fastqc.wdl b/fastqc.wdl index d821e531..59592d4e 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -47,7 +47,7 @@ task Fastqc { Int threads = 1 String memory = "2GiB" Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4 - String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0" + String dockerImage = "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0" Array[File]? noneArray File? noneFile From 5cf560b5a9e69ba683c431193c330fdb7a41c028 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Apr 2023 16:55:44 +0200 Subject: [PATCH 1052/1208] Update classpath --- fastqc.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqc.wdl b/fastqc.wdl index 59592d4e..da31882c 100644 --- a/fastqc.wdl +++ b/fastqc.wdl @@ -67,7 +67,7 @@ task Fastqc { command <<< set -e mkdir -p "~{outdirPath}" - FASTQC_DIR="/usr/local/opt/fastqc-0.11.9" + FASTQC_DIR="/usr/local/opt/fastqc-0.12.1" export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \ -Xms200M -Xmx~{javaXmx} \ From 0ed76c14ffe5ab4779ed42f924fbcab1acdda266 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 1 May 2023 15:46:55 +0200 Subject: [PATCH 1053/1208] Stable version in changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a13b2f6c..1551d13d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.2.0-dev +version 5.2.0 --------------------------- + Update cutadapt version to 4.4 + Update FastQC version to 0.12.1 From 73f769bb966f67b9bf3fd72b9f5c4d6f923ccafa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 1 May 2023 15:52:47 +0200 Subject: [PATCH 1054/1208] setting next version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 91ff5727..03f488b0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -5.2.0 +5.3.0 From 0062b727197ae2601b234d7a69ae0f64bd7b59d1 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 8 Jun 2023 13:01:16 +0200 Subject: [PATCH 1055/1208] Add revcomp flag to cutadapt --- CHANGELOG.md | 5 +++++ cutadapt.wdl | 3 +++ 2 files changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1551d13d..5eb2ef17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> +version 5.3.0-dev +--------------------------- ++ Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. + + version 5.2.0 --------------------------- + Update cutadapt version to 4.4 diff --git a/cutadapt.wdl b/cutadapt.wdl index 191e6f0a..a164e360 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -81,6 +81,7 @@ task Cutadapt { Boolean? bwa Boolean? zeroCap Boolean? noZeroCap + Boolean revcomp = false Int cores = 4 String memory = "5GiB" @@ -149,6 +150,7 @@ task Cutadapt { ~{true="--bwa" false="" bwa} \ ~{true="--zero-cap" false="" zeroCap} \ ~{true="--no-zero-cap" false="" noZeroCap} \ + ~{if revcomp then "--revcomp" else ""} ~{read1} \ ~{read2} \ ~{"> " + reportPath} @@ -231,6 +233,7 @@ task Cutadapt { bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"} zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"} noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"} + revcomp: {description: "Equivalent to cutadapt's --revcomp flag.", category: "advanced"} cores: {description: "The number of cores to use.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 502d73003072327d9756b4b2ce0c2f768ff1192a Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Thu, 8 Jun 2023 13:02:14 +0200 Subject: [PATCH 1056/1208] add missing backslash --- cutadapt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cutadapt.wdl b/cutadapt.wdl index a164e360..c695c08e 100644 --- a/cutadapt.wdl +++ b/cutadapt.wdl @@ -150,7 +150,7 @@ task Cutadapt { ~{true="--bwa" false="" bwa} \ ~{true="--zero-cap" false="" zeroCap} \ ~{true="--no-zero-cap" false="" noZeroCap} \ - ~{if revcomp then "--revcomp" else ""} + ~{if revcomp then "--revcomp" else ""} \ ~{read1} \ ~{read2} \ ~{"> " + reportPath} From cebb1b535be90193ed27c57f3ea2c659f20bfe39 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:20:13 +0200 Subject: [PATCH 1057/1208] add a task for fastqFilter --- fastqFilter.wdl | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 fastqFilter.wdl diff --git a/fastqFilter.wdl b/fastqFilter.wdl new file mode 100644 index 00000000..d436b1ab --- /dev/null +++ b/fastqFilter.wdl @@ -0,0 +1,66 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2023 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task FastqFilter { + input { + Array[File]+ fastq + Array[String]+ outputPaths + Int? minLength + Int? maxLength + + String memory = "4GiB" + Int timeMinutes = 1 + ceil(size(seqFile, "G")) + String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" + } + + command { + set -e + mkdir -p $(dirname ~{sep=" " outputPaths}) + fastq-filter \ + -o ~{sep=" -o " outputPaths} \ + ~{"-l " + minLength} \ + ~{"-L " + maxLength} \ + ~{sep=" " fastq} + } + + output { + Array[File] filtered = outputPaths + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + fastq: {description: "A list of fastq files to filter.", category: "required"} + outputPaths: {description: "A list containing the output paths for each input fastq file.", category: "required"} + minLength: {description: "Equivalent to fastq-filter's `--min-length` option.", category: "common"} + maxLength: {description: "Equivalent to fastq-filter's `--max-length` option.", category: "common"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} \ No newline at end of file From 93e491d37de5780bea73010323dcef939814cdbc Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:21:47 +0200 Subject: [PATCH 1058/1208] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5eb2ef17..34bf0600 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.3.0-dev --------------------------- ++ Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From 3fc46b91cc63c31b1477692638492fdda9bbc084 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Mon, 12 Jun 2023 15:32:34 +0200 Subject: [PATCH 1059/1208] fix copy-paste error --- fastqFilter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqFilter.wdl b/fastqFilter.wdl index d436b1ab..2b2fcc45 100644 --- a/fastqFilter.wdl +++ b/fastqFilter.wdl @@ -30,7 +30,7 @@ task FastqFilter { Int? maxLength String memory = "4GiB" - Int timeMinutes = 1 + ceil(size(seqFile, "G")) + Int timeMinutes = 1 + ceil(size(fastq, "G")) String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" } From 2bc4c06dd89444b6ccb42244a566873ba7fad5a2 Mon Sep 17 00:00:00 2001 From: Davy Cats Date: Tue, 13 Jun 2023 09:37:54 +0200 Subject: [PATCH 1060/1208] use 1GiB for fastqFilter --- fastqFilter.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastqFilter.wdl b/fastqFilter.wdl index 2b2fcc45..3701b8aa 100644 --- a/fastqFilter.wdl +++ b/fastqFilter.wdl @@ -29,7 +29,7 @@ task FastqFilter { Int? minLength Int? maxLength - String memory = "4GiB" + String memory = "1GiB" Int timeMinutes = 1 + ceil(size(fastq, "G")) String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1" } From 3fb2c1de2e19f68f7a3ab3e205864bff21bb3ba1 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <15814544+Redmar-van-den-Berg@users.noreply.github.com> Date: Thu, 7 Sep 2023 08:48:09 +0200 Subject: [PATCH 1061/1208] Use softlink instead of hardlinks If the database files are on a different filesystem then the analysis folder, hardlinks are not allowed, leading to crashes. --- centrifuge.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/centrifuge.wdl b/centrifuge.wdl index 757af239..41a907ae 100644 --- a/centrifuge.wdl +++ b/centrifuge.wdl @@ -122,7 +122,7 @@ task Classify { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge \ ~{inputFormatOptions[inputFormat]} \ @@ -199,7 +199,7 @@ task Inspect { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge-inspect \ ~{outputOptions[printOption]} \ @@ -256,7 +256,7 @@ task KReport { indexBasename="$(basename ~{sub(indexFiles[0], "\.[0-9]\.cf", "")})" for file in ~{sep=" " indexFiles} do - ln ${file} $PWD/"$(basename ${file})" + ln -s ${file} $PWD/"$(basename ${file})" done centrifuge-kreport \ -x $PWD/${indexBasename} \ From 44cdc1862bf20b1cf77f0fedfb0ba25b3e5efa43 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Thu, 7 Sep 2023 08:52:12 +0200 Subject: [PATCH 1062/1208] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34bf0600..6acbbc85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 5.3.0-dev --------------------------- ++ Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From 7404b0e6f7470c4d04d80f7037f1068ad091d9ba Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 26 Aug 2024 17:07:03 +0200 Subject: [PATCH 1063/1208] Add a selectGenotype switch --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 0b93efe6..a2aff322 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1514,6 +1514,7 @@ task SelectVariants { Array[File] intervals = [] String? selectTypeToInclude + String? selectGenotype String javaXmx = "4G" String memory = "5GiB" @@ -1529,6 +1530,7 @@ task SelectVariants { -R ~{referenceFasta} \ -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ + ~{"-select-genotype " + selectGenotype} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From d86d9cb89a8f8b74ad2b714a23e1686fd4f26e3d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 27 Aug 2024 10:19:18 +0200 Subject: [PATCH 1064/1208] Quote select genotype value --- gatk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gatk.wdl b/gatk.wdl index a2aff322..f272a2f9 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1530,7 +1530,7 @@ task SelectVariants { -R ~{referenceFasta} \ -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ - ~{"-select-genotype " + selectGenotype} \ + ~{"-select-genotype \"" + selectGenotype}~{true="\"" false="" defined(selectGenotype)} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From 558c9b7d7370b0f46346c16beaa4d4cb3f48b09e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 30 Aug 2024 15:23:55 +0200 Subject: [PATCH 1065/1208] Add exclude filtered expression --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index f272a2f9..230674a5 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1513,6 +1513,7 @@ task SelectVariants { String outputPath = "output.vcf.gz" Array[File] intervals = [] + Boolean excludeFiltered = false String? selectTypeToInclude String? selectGenotype @@ -1531,6 +1532,7 @@ task SelectVariants { -V ~{inputVcf} \ ~{"--select-type-to-include " + selectTypeToInclude} \ ~{"-select-genotype \"" + selectGenotype}~{true="\"" false="" defined(selectGenotype)} \ + ~{true="--exclude-filtered" false="" excludeFiltered} \ ~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \ -O ~{outputPath} } From 75f36133cb52ce6f02701ff11612f6884a8d1726 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 18 Oct 2024 14:52:33 +0200 Subject: [PATCH 1066/1208] Use reference files in rtg-tools tasks to make tasks cacheable --- rtg.wdl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/rtg.wdl b/rtg.wdl index 3e9dab9b..62e1e77f 100644 --- a/rtg.wdl +++ b/rtg.wdl @@ -24,8 +24,7 @@ task Format { input { Array[File]+ inputFiles String format = "fasta" - String outputPath = "seq_data.sdf" - + String outputPath = "reference_data" String rtgMem = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(inputFiles, "GiB") * 2) @@ -41,7 +40,7 @@ task Format { } output { - File sdf = outputPath + Array[File] referenceFiles = glob("~{outputPath}/*") } runtime { @@ -61,7 +60,7 @@ task Format { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - sdf: {description: "RTGSequence Data File (SDF) format version of the input file(s)."} + referenceFiles: {description: "An array with all the generated reference files"} } } @@ -74,7 +73,7 @@ task VcfEval { Boolean squashPloidy = false String outputMode = "split" String outputDir = "output/" - File template + Array[File] referenceFiles Boolean allRecords = false Boolean decompose = false Boolean refOverlap = false @@ -99,7 +98,7 @@ task VcfEval { ~{"--evaluation-regions " + evaluationRegions} \ ~{"--bed-regions " + bedRegions} \ --output ~{outputDir} \ - --template ~{template} \ + --template $(dirname ~{referenceFiles[0]}) \ ~{true="--all-records" false="" allRecords} \ ~{true="--decompose" false="" decompose} \ ~{true="--ref-overlap" false="" refOverlap} \ @@ -152,7 +151,7 @@ task VcfEval { squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences.", category: "common"} outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split).", category: "advanced"} outputDir: {description: "Directory for output.", category: "advanced"} - template: {description: "SDF of the reference genome the variants are called against.", category: "required"} + referenceFiles: {description: "An array of reference Files generated by the Format task.", category: "required"} allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\").", category: "common"} decompose: {description: "decompose complex variants into smaller constituents to allow partial credit.", category: "common"} refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap).", category: "common"} From 53d5083e5ca9de973eba1916dc273e0ff3dd9e04 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:04:31 +0100 Subject: [PATCH 1067/1208] Update minimap2 task to output sorted BAM --- minimap2.wdl | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/minimap2.wdl b/minimap2.wdl index 96cc7734..47464585 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -81,15 +81,19 @@ task Indexing { task Mapping { input { String presetOption - Int kmerSize = 15 - Boolean skipSelfAndDualMappings = false - Boolean outputSam = false String outputPrefix - Boolean addMDTagToSam = false - Boolean secondaryAlignment = false File referenceFile File queryFile + + Int compressionLevel = 1 + Int additionalSortThreads = 1 + Int sortMemoryGb = 1 + Boolean skipSelfAndDualMappings = false + Boolean addMDTagToSam = false + Boolean secondaryAlignment = true + + Int? kmerSize Int? maxIntronLength Int? maxFragmentLength Int? retainMaxSecondaryAlignments @@ -97,8 +101,8 @@ task Mapping { Int? mismatchPenalty String? howToFindGTAG - Int cores = 4 - String memory = "30GiB" + Int cores = 8 + String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } @@ -108,13 +112,11 @@ task Mapping { mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ -x ~{presetOption} \ - -k ~{kmerSize} \ ~{true="-X" false="" skipSelfAndDualMappings} \ - ~{true="-a" false="" outputSam} \ - -o ~{outputPrefix} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ -t ~{cores} \ + ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ ~{"-F " + maxFragmentLength} \ ~{"-N " + retainMaxSecondaryAlignments} \ @@ -122,11 +124,18 @@ task Mapping { ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ ~{referenceFile} \ - ~{queryFile} + ~{queryFile} \ + | samtools sort \ + -@ ~{additionalSortThreads} \ + -l ~{compressionLevel} \ + -m ~{sortMemoryGb}G \ + -o ~{outputPrefix}.bam + samtools index -o ~{outputPrefix}.bam } output { - File alignmentFile = outputPrefix + File bam = ~{outputPrefix}.bam + File bamIndex = ~{outputPrefix}.bam.bai } runtime { From 77506d8d208b524cfb2427314d4568aac75e4b87 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:05:39 +0100 Subject: [PATCH 1068/1208] Add a flag for namesorting --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 47464585..64313ef4 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -88,6 +88,7 @@ task Mapping { Int compressionLevel = 1 Int additionalSortThreads = 1 Int sortMemoryGb = 1 + Boolean nameSorted = false Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false @@ -126,6 +127,7 @@ task Mapping { ~{referenceFile} \ ~{queryFile} \ | samtools sort \ + ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ From e78cfa0c198a65d60f6b1adb3e33878c02e5c90f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 11:18:46 +0100 Subject: [PATCH 1069/1208] Add clair3 task --- clair3.wdl | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 clair3.wdl diff --git a/clair3.wdl b/clair3.wdl new file mode 100644 index 00000000..eb18d208 --- /dev/null +++ b/clair3.wdl @@ -0,0 +1,61 @@ +version 1.0 + +# Copyright (c) 2024 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Clair3 { + input { + File bam + File bamIndex + File referenceFasta + File referenceFastaFai + String outputPrefix + File? model + String? builtinModel + String platform + Int threads = 8 + Boolean includeAllCtgs = false + String memory = "20GiB" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / cores) + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + } + + # A default set for testing + String modelArg = "~{true=model false=builtinModel, defined(model)}" + + command <<< + run_clair3.sh \ + --model=~{modelArg} \ + --ref_fn=~{reference_fasta} \ + --bam_fn=~{bam} \ + --output=out \ + --threads=~{threads} \ + --platform=~{platform} \ + ~{true="--include_all_ctgs" false =""} + mv out/merge_output.vcf.gz ~{prefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{prefix}.vcf.gz.tbi + >>> + output { + File vcf = "~{outputPrefix}.vcf.gz" + File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" + } + + +} \ No newline at end of file From 0d84d673368819a78296f97f0f5b6c3225439ded Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 12:20:35 +0100 Subject: [PATCH 1070/1208] Add sequali and update multiqc to a version that supports it --- multiqc.wdl | 2 +- sequali.wdl | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 sequali.wdl diff --git a/multiqc.wdl b/multiqc.wdl index 21fc8a7d..f04a1021 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.9--py_1" + String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0 " } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/sequali.wdl b/sequali.wdl new file mode 100644 index 00000000..98700fb7 --- /dev/null +++ b/sequali.wdl @@ -0,0 +1,46 @@ +version 1.0 + +# Copyright (c) 2024 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Sequali { + input { + File reads + File? mate_reads + Int threads = 2 + String outDir = "." + dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + } + + command <<< + set -e + mkdir -p $(dirname outputDir) + sequali \ + --outdir ~{outDir} \ + --threads ~{threads} \ + ~{reads} \ + ~{mate_reads} + >>> + + output { + File html = basename(reads) + ".html" + File json = basename(reads) + ".json" + } +} \ No newline at end of file From 272842244d79797615aa430bb6836a8cb78ba8fd Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 12:45:59 +0100 Subject: [PATCH 1071/1208] Fix womtool validation errors --- clair3.wdl | 16 ++++++++-------- minimap2.wdl | 8 ++++---- sequali.wdl | 9 ++++++++- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index eb18d208..6c0c1d38 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -33,25 +33,25 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "20GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" } - # A default set for testing - String modelArg = "~{true=model false=builtinModel, defined(model)}" + String modelArg = "~{true=model false=builtinModel defined(model)}" command <<< run_clair3.sh \ --model=~{modelArg} \ - --ref_fn=~{reference_fasta} \ + --ref_fn=~{referenceFasta} \ --bam_fn=~{bam} \ --output=out \ --threads=~{threads} \ --platform=~{platform} \ - ~{true="--include_all_ctgs" false =""} - mv out/merge_output.vcf.gz ~{prefix}.vcf.gz - mv out/merge_output.vcf.gz.tbi ~{prefix}.vcf.gz.tbi + ~{true="--include_all_ctgs" false ="" includeAllCtgs} + mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi >>> + output { File vcf = "~{outputPrefix}.vcf.gz" File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" diff --git a/minimap2.wdl b/minimap2.wdl index 64313ef4..fff5b4ec 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -136,8 +136,8 @@ task Mapping { } output { - File bam = ~{outputPrefix}.bam - File bamIndex = ~{outputPrefix}.bam.bai + File bam = "~{outputPrefix}.bam " + File bamIndex = "~{outputPrefix}.bam.bai" } runtime { @@ -152,7 +152,6 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "common"} kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"} skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"} - outputSam: {description: "Output in the sam format.", category: "common"} outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"} secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"} @@ -170,6 +169,7 @@ task Mapping { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."} + bam: {description: "Mapping and alignment between collections of dna sequences file in BAM format."} + bamIndex: {description: "Accompanying index file for the BAM file."} } } diff --git a/sequali.wdl b/sequali.wdl index 98700fb7..c2eff2c9 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -26,7 +26,7 @@ task Sequali { File? mate_reads Int threads = 2 String outDir = "." - dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" } command <<< @@ -43,4 +43,11 @@ task Sequali { File html = basename(reads) + ".html" File json = basename(reads) + ".json" } + + runtime { + cpu: threads + memory: "2GiB" + docker: dockerImage + time_minutes: 59 + } } \ No newline at end of file From 01ff19c51bf4b8ff28cf16b067bbb128d2d435b4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 13:12:56 +0100 Subject: [PATCH 1072/1208] Fix runtime issues --- clair3.wdl | 12 ++++++++++-- minimap2.wdl | 12 +++++++----- multiqc.wdl | 2 +- sequali.wdl | 4 ++-- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index 6c0c1d38..2d111a5d 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,12 +34,14 @@ task Clair3 { Boolean includeAllCtgs = false String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } - String modelArg = "~{true=model false=builtinModel defined(model)}" + String modelArg = "~{if defined(model) then model else builtinModel}" command <<< + set -e + mkdir -p $(dirname ~{outputPrefix}) run_clair3.sh \ --model=~{modelArg} \ --ref_fn=~{referenceFasta} \ @@ -57,5 +59,11 @@ task Clair3 { File vcfIndex = "~{outputPrefix}.vcf.gz.tbi" } + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } } \ No newline at end of file diff --git a/minimap2.wdl b/minimap2.wdl index fff5b4ec..5709c998 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -105,13 +105,15 @@ task Mapping { Int cores = 8 String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) - String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0" + # Minimap 2.28 samtools 1.20 + String dockerImage = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" } command { set -e mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ + -a \ -x ~{presetOption} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ @@ -125,19 +127,19 @@ task Mapping { ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ ~{referenceFile} \ - ~{queryFile} \ + ~{queryFile} \ | samtools sort \ ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam - samtools index -o ~{outputPrefix}.bam + samtools index ~{outputPrefix}.bam } output { - File bam = "~{outputPrefix}.bam " - File bamIndex = "~{outputPrefix}.bam.bai" + File bam = "~{outputPrefix}.bam" + File bamIndex = "~{outputPrefix}.bam.bai" } runtime { diff --git a/multiqc.wdl b/multiqc.wdl index f04a1021..a2e32cdb 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0 " + String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0" } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/sequali.wdl b/sequali.wdl index c2eff2c9..ed6e5d40 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -40,8 +40,8 @@ task Sequali { >>> output { - File html = basename(reads) + ".html" - File json = basename(reads) + ".json" + File html = outDir + "/" + basename(reads) + ".html" + File json = outDir + "/" + basename(reads) + ".json" } runtime { From a488618740428dcc7e940a6b27750ff62b87428e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 13:28:56 +0100 Subject: [PATCH 1073/1208] Include all contigs by default for clair3 --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 2d111a5d..d824ec13 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -31,7 +31,7 @@ task Clair3 { String? builtinModel String platform Int threads = 8 - Boolean includeAllCtgs = false + Boolean includeAllCtgs = true # Not the clair3 default, but generally what you want. String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" From 1bc3416c90953ba05d3e00370c74355ad0fa7c9b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 8 Nov 2024 15:17:14 +0100 Subject: [PATCH 1074/1208] Work from a model tar file --- clair3.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index d824ec13..7b2d98fe 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -27,7 +27,7 @@ task Clair3 { File referenceFasta File referenceFastaFai String outputPrefix - File? model + File? modelTar String? builtinModel String platform Int threads = 8 @@ -37,10 +37,11 @@ task Clair3 { String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } - String modelArg = "~{if defined(model) then model else builtinModel}" + String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" command <<< set -e + ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } mkdir -p $(dirname ~{outputPrefix}) run_clair3.sh \ --model=~{modelArg} \ From 8fa481125d3038034a2ae28fedf88809b10e0c98 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 11 Nov 2024 14:30:25 +0100 Subject: [PATCH 1075/1208] Set includeAlCtgs to false --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 7b2d98fe..bc25394b 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -31,7 +31,7 @@ task Clair3 { String? builtinModel String platform Int threads = 8 - Boolean includeAllCtgs = true # Not the clair3 default, but generally what you want. + Boolean includeAllCtgs = false String memory = "20GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" From 98d9e2c92b0655eb022bd9793b3449ba3eb52b9f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 12 Nov 2024 08:38:00 +0100 Subject: [PATCH 1076/1208] Increase memory --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index bc25394b..4184f49e 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -32,7 +32,7 @@ task Clair3 { String platform Int threads = 8 Boolean includeAllCtgs = false - String memory = "20GiB" + String memory = "~{threads + 16}GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" } From f13a7e2dbe793b2742080b91d90e42b29f6c0e6c Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 13 Nov 2024 16:47:03 +0100 Subject: [PATCH 1077/1208] Update parameter_meta --- gatk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gatk.wdl b/gatk.wdl index 230674a5..655a0b66 100644 --- a/gatk.wdl +++ b/gatk.wdl @@ -1558,6 +1558,8 @@ task SelectVariants { outputPath: {description: "The location the output VCF file should be written.", category: "advanced"} intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"} selectTypeToInclude: {description: "Select only a certain type of variants from the input file.", category: "common"} + excludeFiltered: {description: "Remove all variants that do not have a PASS filter", category: "advanced"} + selectGenotype: {description: "The genotype to be selected", category: "advanced"} javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 3c8d2e73d12d9cd3101752dff2976f86d61b4c23 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 13 Nov 2024 16:48:14 +0100 Subject: [PATCH 1078/1208] Update changelog --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acbbc85..6db06e23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,13 +8,14 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.3.0-dev +version 6.0.0-dev --------------------------- ++ rtg Format and VcfEval tasks now handle reference as an array of files to enable caching. ++ Added --select-genotype and --exclude-filtered flags to GATK SelectVariants + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. - version 5.2.0 --------------------------- + Update cutadapt version to 4.4 From a6eec0e6af6554ba1c85a24e3a63b0bcd01cfe76 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Dec 2024 15:51:29 +0100 Subject: [PATCH 1079/1208] Add a readgroup flag to minimap2 --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 5709c998..e785ffd7 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -101,6 +101,7 @@ task Mapping { Int? matchingScore Int? mismatchPenalty String? howToFindGTAG + String? readgroup Int cores = 8 String memory = "24GiB" @@ -126,6 +127,7 @@ task Mapping { ~{"-A " + matchingScore} \ ~{"-B " + mismatchPenalty} \ ~{"-u " + howToFindGTAG} \ + ~{"-R '" + readgroup}~{false="" true="'" defined(readgroup)} \ ~{referenceFile} \ ~{queryFile} \ | samtools sort \ From b717f3fa8d82d3bb040d3df134533839f5adec9d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 17 Dec 2024 17:33:59 +0100 Subject: [PATCH 1080/1208] Add -o pipefail --- minimap2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/minimap2.wdl b/minimap2.wdl index e785ffd7..95b84bc4 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -111,7 +111,7 @@ task Mapping { } command { - set -e + set -e -o pipefail mkdir -p "$(dirname ~{outputPrefix})" minimap2 \ -a \ From 42ca869223960072ca0f9fc1e87aae7f469a4d34 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 8 Jan 2025 17:35:29 +0100 Subject: [PATCH 1081/1208] Allow copying of comments from fastq --- minimap2.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 95b84bc4..daf47a9a 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -93,6 +93,7 @@ task Mapping { Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false Boolean secondaryAlignment = true + Boolean copyCommentsFromFastq = true Int? kmerSize Int? maxIntronLength @@ -119,6 +120,7 @@ task Mapping { ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ + ~{true="-y" false="" copyCommentsFromFastq} \ -t ~{cores} \ ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ From 7240b178ef378d39b5cb0983cf3a681b0bf52488 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 15:53:23 +0100 Subject: [PATCH 1082/1208] Allow minimap2 to process uBAM --- minimap2.wdl | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/minimap2.wdl b/minimap2.wdl index daf47a9a..18127cb1 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -89,11 +89,19 @@ task Mapping { Int additionalSortThreads = 1 Int sortMemoryGb = 1 Boolean nameSorted = false + # MM, ML, MN -> Methylation flags + # Also keep the following flags for Sequali to be able to run on the mapped bam file and get ONT information. + # ch -> channel + # st -> start time + # du -> duration + # dx -> Whether read was duplex + # pi -> Parent ID for split read + + String tagsToKeep = "MM,ML,MN,ch,st,du,dx,pi" Boolean skipSelfAndDualMappings = false Boolean addMDTagToSam = false Boolean secondaryAlignment = true - Boolean copyCommentsFromFastq = true Int? kmerSize Int? maxIntronLength @@ -111,16 +119,21 @@ task Mapping { String dockerImage = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0" } - command { + # Always run data through samtools fastq. This supports both FASTQ and uBAM + # files. It does remove any existing FASTQ comments, but this should not be + # problematic for most files. + + command <<< set -e -o pipefail mkdir -p "$(dirname ~{outputPrefix})" + samtools fastq -T "~{tagsToKeep}" ~{queryFile} | \ minimap2 \ -a \ -x ~{presetOption} \ ~{true="-X" false="" skipSelfAndDualMappings} \ ~{true="--MD" false="" addMDTagToSam} \ --secondary=~{true="yes" false="no" secondaryAlignment} \ - ~{true="-y" false="" copyCommentsFromFastq} \ + -y \ -t ~{cores} \ ~{"-k " + kmerSize} \ ~{"-G " + maxIntronLength} \ @@ -131,7 +144,7 @@ task Mapping { ~{"-u " + howToFindGTAG} \ ~{"-R '" + readgroup}~{false="" true="'" defined(readgroup)} \ ~{referenceFile} \ - ~{queryFile} \ + - \ | samtools sort \ ~{true="-N" false="" nameSorted} \ -@ ~{additionalSortThreads} \ @@ -139,7 +152,7 @@ task Mapping { -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam samtools index ~{outputPrefix}.bam - } + >>> output { File bam = "~{outputPrefix}.bam" @@ -168,6 +181,7 @@ task Mapping { retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"} matchingScore: {description: "Matching score.", category: "advanced"} mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} + tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} From c7c1b5bb932de4ea6d1ca3069007d4e1ad5c168d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 16:04:08 +0100 Subject: [PATCH 1083/1208] Allow sample name to set --- clair3.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clair3.wdl b/clair3.wdl index 4184f49e..db2c2fb5 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -27,6 +27,7 @@ task Clair3 { File referenceFasta File referenceFastaFai String outputPrefix + String? sampleName File? modelTar String? builtinModel String platform @@ -50,6 +51,7 @@ task Clair3 { --output=out \ --threads=~{threads} \ --platform=~{platform} \ + ~{"--sample_name=" + sampleName} \ ~{true="--include_all_ctgs" false ="" includeAllCtgs} mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi From e3ceb602b5baf955f850f30301a68bc1a1a1c970 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 10 Jan 2025 16:47:20 +0100 Subject: [PATCH 1084/1208] Proper numshards to deepvariant and update it to latest version --- deepvariant.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 25d05bd9..2d212000 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -27,19 +27,19 @@ task RunDeepVariant { File inputBam File inputBamIndex String modelType - String outputVcf + String outputVcf = "sample.vcf.gz" String? postprocessVariantsExtraArgs File? customizedModel - Int? numShards + Int numShards = 4 String? outputGVcf String? outputGVcfIndex File? regions String? sampleName Boolean? VCFStatsReport = true - String memory = "3GiB" + String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.0.0" + String dockerImage = "google/deepvariant:1.6.1" } command { @@ -62,6 +62,7 @@ task RunDeepVariant { memory: memory time_minutes: timeMinutes docker: dockerImage + cpu: numShards } output { From a5dca2e7596f50436beb6c69b597722dc4aaa764 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 11:28:46 +0100 Subject: [PATCH 1085/1208] Add modkit pileup --- modkit.wdl | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 modkit.wdl diff --git a/modkit.wdl b/modkit.wdl new file mode 100644 index 00000000..4ac6bfa6 --- /dev/null +++ b/modkit.wdl @@ -0,0 +1,64 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Pileup { + input { + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + File bam + File bamIndex + String outputBed = "output.bed" + File referenceFasta + File referenceFastaFai + + Int? intervalSize + File? includeBed + + Boolean cpg = false + Boolean combineMods = false + String logFilePath = "modkit.log" + + Int threads = 4 + + } + + command <<< + set -e + mkdir -p $(dirname ~{outputBed}) + mkdir -p $(dirname ~{logFilePath}) + modkit pileup \ + --threads ~{threads} \ + ~{"--interval-size " + intervalSize} \ + ~{"--include-bed " + includeBed} + --ref ~{referenceFasta} \ + ~{true="--cpg" false="" cpg} \ + ~{true="--combine-mods" false="" combineMods} \ + --log-filepath ~{logFilePath} \ + ~{bam} \ + ~{outputBed} + >>> + + runtime { + docker: dockerImage + cpu: threads + + } +} \ No newline at end of file From 085fc5dd691444c9bcdb6c0483413ce5c1cf8d5f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:15:28 +0100 Subject: [PATCH 1086/1208] Update modkit --- modkit.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index 4ac6bfa6..9f311121 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -56,6 +56,11 @@ task Pileup { ~{outputBed} >>> + output { + File out = outputBed + File logFile = logFilePath + } + runtime { docker: dockerImage cpu: threads From 3540b4a12a2b7d56249f2d20941a6526af9c8f6e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:19:14 +0100 Subject: [PATCH 1087/1208] Add memory to modkit --- modkit.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index 9f311121..96f92c41 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -37,6 +37,7 @@ task Pileup { String logFilePath = "modkit.log" Int threads = 4 + String memory = "16GiB" } @@ -64,6 +65,7 @@ task Pileup { runtime { docker: dockerImage cpu: threads + memory: memory } } \ No newline at end of file From bc179875e1cf04fcd4efc63338b73d1230e3ef96 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 14:23:21 +0100 Subject: [PATCH 1088/1208] Add missing backslash --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 96f92c41..4f8bceb4 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -48,7 +48,7 @@ task Pileup { modkit pileup \ --threads ~{threads} \ ~{"--interval-size " + intervalSize} \ - ~{"--include-bed " + includeBed} + ~{"--include-bed " + includeBed} \ --ref ~{referenceFasta} \ ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ From c69c5cb2031913669dba5bf2cfe1acc4b00fed95 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 15:44:00 +0100 Subject: [PATCH 1089/1208] Set rather high defaults for time and memory for modkit --- modkit.wdl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 4f8bceb4..d827d896 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -36,8 +36,9 @@ task Pileup { Boolean combineMods = false String logFilePath = "modkit.log" - Int threads = 4 - String memory = "16GiB" + Int threads = 8 + String memory = "48GiB" + Int timeMinutes = 4320 # 3 Days } @@ -66,6 +67,6 @@ task Pileup { docker: dockerImage cpu: threads memory: memory - + time_minutes: timeMinutes } } \ No newline at end of file From beec409c6e2ce345d6976f159d7da73b79110fe4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 29 Jan 2025 16:35:59 +0100 Subject: [PATCH 1090/1208] Upgrade sequali memory --- sequali.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sequali.wdl b/sequali.wdl index ed6e5d40..664fc082 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -46,7 +46,7 @@ task Sequali { runtime { cpu: threads - memory: "2GiB" + memory: "4GiB" docker: dockerImage time_minutes: 59 } From a87956ed26298c48b29f23782dc268f8d8bf29ff Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2025 15:10:51 +0100 Subject: [PATCH 1091/1208] Add modkit flags --- modkit.wdl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modkit.wdl b/modkit.wdl index d827d896..35d3c7fc 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -34,6 +34,9 @@ task Pileup { Boolean cpg = false Boolean combineMods = false + Boolean combineStrands = false + Boolean bedgraph = false + String? ignore String logFilePath = "modkit.log" Int threads = 8 @@ -50,9 +53,12 @@ task Pileup { --threads ~{threads} \ ~{"--interval-size " + intervalSize} \ ~{"--include-bed " + includeBed} \ + ~{"--ignore " + ignore} \ --ref ~{referenceFasta} \ ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ + ~{true="--combine-strands" false="" combineStrands} \ + ~{true="--bedgraph" false="" bedgraph} \ --log-filepath ~{logFilePath} \ ~{bam} \ ~{outputBed} From 730a8a7672b491ccac1dbfdab497a9420ac40f71 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 3 Feb 2025 16:12:37 +0100 Subject: [PATCH 1092/1208] Capture multiple output files --- modkit.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 35d3c7fc..1cac1bd1 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -65,7 +65,8 @@ task Pileup { >>> output { - File out = outputBed + File? out = outputBed # Normal mode + Array[File] outFiles = glob(outputBed + "/*") # Bedgraph mode File logFile = logFilePath } From ed50e2dfb30a8f354f4e0dd2a4f7ae5aeec952fe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 4 Feb 2025 17:01:46 +0100 Subject: [PATCH 1093/1208] Update documentation for new tasks --- clair3.wdl | 22 ++++++++++++++++++++++ modkit.wdl | 30 +++++++++++++++++++++++++++++- sequali.wdl | 25 ++++++++++++++++++++++--- 3 files changed, 73 insertions(+), 4 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index db2c2fb5..709d59b5 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -69,4 +69,26 @@ task Clair3 { docker: dockerImage } + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputPrefix: {description: "The output prefix where the data should be placed.", category: "common"} + modelTar: {description: "The TAR file with the model", category: "common"} + builtinModel: {description: "The builtin model name (in case a tar file is not used)", category: "common"} + sampleName: {description: "The name of the sample in the VCF", category: "common"} + platform: {description: "platform setting for clair3.", category: "required"} + includeAllCtgs: {description: "whether or not to call all contigs in the reference", category: "advanced"} + threads: {description: "The number of threads to use for variant calling.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + vcf: {description: "Output VCF file."} + vcfIndex: {description: "Output VCF index."} + + } } \ No newline at end of file diff --git a/modkit.wdl b/modkit.wdl index 1cac1bd1..382bfc09 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -22,7 +22,6 @@ version 1.0 task Pileup { input { - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" File bam File bamIndex String outputBed = "output.bed" @@ -42,6 +41,7 @@ task Pileup { Int threads = 8 String memory = "48GiB" Int timeMinutes = 4320 # 3 Days + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } @@ -76,4 +76,32 @@ task Pileup { memory: memory time_minutes: timeMinutes } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + referenceFasta: {description: "The reference fasta file.", category: "required"} + referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} + outputBed: {description: "The output name where the data should be placed.", category: "common"} + + intervalSize: {description: "Sets the interval size", category: "advanced"} + includeBed: {description: "Bed file with regions to include", category: "advanced"} + cpg: {description: "Whether to call only at cpg sites", category: "advanced"} + combineMods: {description: "Whether to combine modifications in the output", category: "advanced"} + combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} + bedgraph: {description: "Whether to create a folder instead with a bedgraph file", category: "advanced"} + ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} + logFilePath: {description: "Path where the log file should be written.", category: "advanced"} + + threads: {description: "The number of threads to use for variant calling.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + out: {description: "The output bed files. Not available when bedgraph = true."} + outFiles: {description: "Output files when bedgraph = true."} + logFile: {description: "The generated log file."} + } } \ No newline at end of file diff --git a/sequali.wdl b/sequali.wdl index 664fc082..cbd3d869 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -24,9 +24,12 @@ task Sequali { input { File reads File? mate_reads - Int threads = 2 String outDir = "." + + Int threads = 2 + String memory = "4GiB" String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" + Int timeMinutes = 59 } command <<< @@ -46,8 +49,24 @@ task Sequali { runtime { cpu: threads - memory: "4GiB" + memory: memory docker: dockerImage - time_minutes: 59 + time_minutes: timeMinutes + } + parameter_meta { + # inputs + reads: {description: "A FASTQ or BAM file.", category: "required"} + mate_reads: {description: "FASTQ mate file"} + threads: {description: "The number of cores to use.", category: "advanced"} + + outDir: {description: "The path to write the output to.", catgory: "required"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + html: {description: "HTML report file."} + json: {description: "JSON report file for use with MultiQC."} } } \ No newline at end of file From 113d4c58930aa2fcde99eed5b018bb8061e612cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 09:13:34 +0100 Subject: [PATCH 1094/1208] Update changelog --- CHANGELOG.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6acbbc85..97a1d016 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,14 @@ This document is user facing. Please word the changes in such a way that users understand how the changes affect the new version. --> -version 5.3.0-dev +version 6.0.0-dev --------------------------- ++ Add Sequali task. ++ Add Clair3 task. ++ Add Modkit task. ++ Modify minimap2 task to accept ubam input, including transfer of methylation + tags. Also sort the BAM output file by coordinate. ++ Update DeepVariant container and update resource requirements. + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. From a01b54a0b79a135b3ddf319f71e51d1ef06f0f56 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:00:01 +0100 Subject: [PATCH 1095/1208] Indent clair3 command --- clair3.wdl | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/clair3.wdl b/clair3.wdl index 709d59b5..4d9092f2 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -41,20 +41,20 @@ task Clair3 { String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" command <<< - set -e - ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } - mkdir -p $(dirname ~{outputPrefix}) - run_clair3.sh \ - --model=~{modelArg} \ - --ref_fn=~{referenceFasta} \ - --bam_fn=~{bam} \ - --output=out \ - --threads=~{threads} \ - --platform=~{platform} \ - ~{"--sample_name=" + sampleName} \ - ~{true="--include_all_ctgs" false ="" includeAllCtgs} - mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz - mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi + set -e + ~{if defined(modelTar) then "tar -xvf " + modelTar else "" } + mkdir -p $(dirname ~{outputPrefix}) + run_clair3.sh \ + --model=~{modelArg} \ + --ref_fn=~{referenceFasta} \ + --bam_fn=~{bam} \ + --output=out \ + --threads=~{threads} \ + --platform=~{platform} \ + ~{"--sample_name=" + sampleName} \ + ~{true="--include_all_ctgs" false ="" includeAllCtgs} + mv out/merge_output.vcf.gz ~{outputPrefix}.vcf.gz + mv out/merge_output.vcf.gz.tbi ~{outputPrefix}.vcf.gz.tbi >>> output { From b409ca9ed22505252a4ddf8f451eb9b55be530f1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:12:07 +0100 Subject: [PATCH 1096/1208] More realistic resource requirements for modkit --- modkit.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 382bfc09..92905f06 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -39,8 +39,8 @@ task Pileup { String logFilePath = "modkit.log" Int threads = 8 - String memory = "48GiB" - Int timeMinutes = 4320 # 3 Days + String memory = "4GiB" + Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From bdab5a4c0d0e8474bea79435cc128e50fe5109d2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:12:27 +0100 Subject: [PATCH 1097/1208] More specific bed file naming --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 92905f06..23269bf3 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -24,7 +24,7 @@ task Pileup { input { File bam File bamIndex - String outputBed = "output.bed" + String outputBed = "output.methyl.bed" File referenceFasta File referenceFastaFai From c79ebd4affcc6524e671da9d6d63f98c9d3674c8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:13:54 +0100 Subject: [PATCH 1098/1208] Correct file extension for modkit --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 23269bf3..930b6de9 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -24,7 +24,7 @@ task Pileup { input { File bam File bamIndex - String outputBed = "output.methyl.bed" + String outputBed = "output.bedMethyl" File referenceFasta File referenceFastaFai From 1580aae26fbec6b819d0a905959dbad7acf6fd63 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Feb 2025 17:14:19 +0100 Subject: [PATCH 1099/1208] Correct whitespacing Co-authored-by: Davy Cats --- sequali.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/sequali.wdl b/sequali.wdl index cbd3d869..b43cf281 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -53,6 +53,7 @@ task Sequali { docker: dockerImage time_minutes: timeMinutes } + parameter_meta { # inputs reads: {description: "A FASTQ or BAM file.", category: "required"} From 63dceb22e11e16a45f8ac04f1c466100e8a263f6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Feb 2025 16:24:21 +0100 Subject: [PATCH 1100/1208] Start on a VEP task --- vep.wdl | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 vep.wdl diff --git a/vep.wdl b/vep.wdl new file mode 100644 index 00000000..83eeac4e --- /dev/null +++ b/vep.wdl @@ -0,0 +1,74 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Vep { + input { + File inputFile + String outputPath = "vep.annotated.vcf.gz" + File cacheTar + File? pluginsTar + String? species + Array[String] plugins = [] + Boolean refseq = false + Boolean merged = false + + Boolean everything = false + Boolean symbol = false + + } + + command <<< + set -e + mkdir vep_cache + tar -x --directory vep_cache -f ~{cacheTar} + ~{"tar -x --directory vep_cache -f " + pluginsTar} + + # Output all stats files by default for MultiQC integration + vep \ + --input_file ~{inputFile} \ + ~{"--species " + species} \ + --stats_html --stats_text \ + --dir vep_cache \ # Output all stats files by default for MultiQC integration + + --offline \ + ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + --vcf \ + --compress-output bgzip \ + ~{true="--refseq" false="" refseq} \ + ~{true="--merged" false="" merged} \ + \ + ~{true="--everything" false="" everything} \ + ~{true="--symbol" false="" symbol} \ + + + # Cleanup the tar extract to save filesystem space + rm -rf vep_cache + + + >>> + + output { + File outputFile = outputPath + File statsHtml = outputPath + "_summary.html" + } + +} \ No newline at end of file From 405395d512611775ed38021d79b3f4f570d0f23e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 14:31:23 +0100 Subject: [PATCH 1101/1208] Add runtime requirements --- vep.wdl | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 83eeac4e..496a6b8f 100644 --- a/vep.wdl +++ b/vep.wdl @@ -34,7 +34,10 @@ task Vep { Boolean everything = false Boolean symbol = false - } + String memory = "8GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" + } command <<< set -e @@ -71,4 +74,15 @@ task Vep { File statsHtml = outputPath + "_summary.html" } -} \ No newline at end of file + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From b6107be5cdfaf396e53f25f2d93b6220d1f14eb7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:06:54 +0100 Subject: [PATCH 1102/1208] Take into account cache tar size for runtime --- vep.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 496a6b8f..4cec3fa3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -35,7 +35,8 @@ task Vep { Boolean symbol = false String memory = "8GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + # Account time for unpacking the cache. + Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } From 5401a6050c9c288f20569b1ffb943f1a05b19d19 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:20:41 +0100 Subject: [PATCH 1103/1208] Cleanup command --- vep.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vep.wdl b/vep.wdl index 4cec3fa3..f9e7a4a0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -36,7 +36,7 @@ task Vep { String memory = "8GiB" # Account time for unpacking the cache. - Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) + Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } @@ -51,8 +51,7 @@ task Vep { --input_file ~{inputFile} \ ~{"--species " + species} \ --stats_html --stats_text \ - --dir vep_cache \ # Output all stats files by default for MultiQC integration - + --dir vep_cache \ --offline \ ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ --vcf \ From 701b819d7bebab81385dbd3c159f31ab37e5961b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:41:20 +0100 Subject: [PATCH 1104/1208] Add missing ~ --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index f9e7a4a0..636a8ce0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -53,7 +53,7 @@ task Vep { --stats_html --stats_text \ --dir vep_cache \ --offline \ - ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ --compress-output bgzip \ ~{true="--refseq" false="" refseq} \ From e4654bc7be895cdf5fc80c02fdbfb84b8941d2aa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:51:53 +0100 Subject: [PATCH 1105/1208] properly format commandline option --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 636a8ce0..626257a3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -55,7 +55,7 @@ task Vep { --offline \ ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ - --compress-output bgzip \ + --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ \ From bda5ff43ad460a51adcfa9daeb3432ec2156c80d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:21:23 +0100 Subject: [PATCH 1106/1208] Fix trailing whitespace --- vep.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 626257a3..f2ca4a6e 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,7 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ - ~{"--species " + species} \ + ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ --offline \ @@ -58,7 +58,6 @@ task Vep { --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ - \ ~{true="--everything" false="" everything} \ ~{true="--symbol" false="" symbol} \ From 967934c2fd0a4a4f29e4ad87475cd9c68a22298a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:39:43 +0100 Subject: [PATCH 1107/1208] Add missing output file param --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index f2ca4a6e..064cf41a 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,6 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ + --output_file ~{outputPath} \ ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ @@ -71,6 +72,7 @@ task Vep { output { File outputFile = outputPath File statsHtml = outputPath + "_summary.html" + File statsTxt = outputPath + "_summary.txt" } runtime { From 115f3cfc0da031309a42a5a02d0825a06e1d3e85 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 17:03:00 +0100 Subject: [PATCH 1108/1208] Make sure output directory is made --- vep.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/vep.wdl b/vep.wdl index 064cf41a..7fb6a660 100644 --- a/vep.wdl +++ b/vep.wdl @@ -43,6 +43,7 @@ task Vep { command <<< set -e mkdir vep_cache + mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} From f29492641550c6d2247a40d216d53c5030d7983d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:22:49 +0100 Subject: [PATCH 1109/1208] Complete VEP task --- vep.wdl | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/vep.wdl b/vep.wdl index 7fb6a660..8a5a443b 100644 --- a/vep.wdl +++ b/vep.wdl @@ -41,12 +41,14 @@ task Vep { } command <<< - set -e + set -eu mkdir vep_cache mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} + # Make sure vep can error, so the removal always succeeds. + set +e # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ @@ -61,13 +63,14 @@ task Vep { ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ ~{true="--everything" false="" everything} \ - ~{true="--symbol" false="" symbol} \ - + ~{true="--symbol" false="" symbol} + VEP_EXIT_CODE=$? + set -e # Cleanup the tar extract to save filesystem space rm -rf vep_cache - + exit $VEP_EXIT_CODE >>> output { @@ -83,8 +86,23 @@ task Vep { } parameter_meta { + # input + inputFile: {description: "The VCF to annotate.", category: "required"} + outputPath: {description: "Where to put the output file", category: "advanced"} + cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} + pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + refseq: {description: "Use the refseq cache", category: "common"} + merged: {description: "Use the merged cache", category: "common"} + everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} + symbol: {description: "Add the gene symbol to the output where available", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + outputFile: {description: "The annotated VEP VCF file."} + statsHtml: {description: "The VEP summary stats HTML file."} + statsTxt: {description: "The VEP summary stats TXT file."} } } From eca4681a0baf841dc2fffc2ca3f22930822740a5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:45:48 +0100 Subject: [PATCH 1110/1208] Add VEP to the changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1276efaa..378731bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add VEP task. + Add Sequali task. + Add Clair3 task. + Add Modkit task. From 203d178e3ea80abef927e7f1ac67d00fec93ff75 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:15:50 +0100 Subject: [PATCH 1111/1208] Add missing parameter_meta for VEP --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index 8a5a443b..349242fb 100644 --- a/vep.wdl +++ b/vep.wdl @@ -91,6 +91,8 @@ task Vep { outputPath: {description: "Where to put the output file", category: "advanced"} cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + species: {description: "Which species cache to use", category: "common"} + plugins: {description: "Which plugins to use", category: "common"} refseq: {description: "Use the refseq cache", category: "common"} merged: {description: "Use the merged cache", category: "common"} everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} From 117e5317fbb50c5989b1afd668d469569b78127e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:20:15 +0100 Subject: [PATCH 1112/1208] Add missing Minimap2 parameter_meta --- minimap2.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 18127cb1..da301bd3 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -183,6 +183,11 @@ task Mapping { mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} + compressionLevel: {description: "compressionLevel for the output file", category: "advanced"} + additionalSortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} + nameSorted: {description: "Output a name sorted file instead", category: "common"} + cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From 319501e7ebbc0fa76baaac1d48d56294eda4b86c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:21:25 +0100 Subject: [PATCH 1113/1208] Add a samtools split task --- samtools.wdl | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..a82bbda1 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -514,6 +514,61 @@ task Sort { } } +task Split { + input { + File inputBam + Directory outputPath + String? unaccountedPath + String? filenameFormat = "%!.%." + String? outputFormat = "bam" + Boolean writeIndex = false + + Int threads = 1 + String memory = "1GiB" + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + } + + command { + set -e + mkdir -p "~{outputPath}" + samtools split \ + --output-fmt ~{outputFormat} \ + -f "~{outputPath}/rg/~{filenameFormat}" \ + ~{"-u " + unaccountedPath} \ + ~{true="--write-index" false="" writeIndex} \ + ~{inputBam} + } + + output { + Array[File] split = glob(outputPath + "/rg/*." + outputFormat) + File? unaccounted = unaccountedPath + } + + runtime { + cpu: threads + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputBam: {description: "The bam file to split.", category: "required"} + outputPath: {description: "Directory to store output bams", category: "required"} + + # Optional parameters + unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "optional"} + filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "format"} + outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "format"} + writeIndex: {description: "Automatically index outputs", category: "indexing"} + + # outputs + split: {description: "BAM file split by read groups"} + unaccounted: {description: "Reads with no RG tag or an unrecognised RG tag."} + } +} + task Tabix { input { File inputFile From 60dcef74f6229d81d19436a361f3e4e6aa41ddd0 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:22:35 +0100 Subject: [PATCH 1114/1208] Register in changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..2993ddc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ New samtools task: split. version 5.2.0 --------------------------- From 4030091ee212be3cc040c69a61834684b8c8be0e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:27:49 +0100 Subject: [PATCH 1115/1208] Directory not yet available --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index a82bbda1..51230097 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -517,7 +517,7 @@ task Sort { task Split { input { File inputBam - Directory outputPath + String outputPath String? unaccountedPath String? filenameFormat = "%!.%." String? outputFormat = "bam" From 8a0de277c0b69a7607757a0c8c102a379e8e444c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 11:28:19 +0100 Subject: [PATCH 1116/1208] Must be defined --- samtools.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 51230097..a2be09a4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -519,8 +519,8 @@ task Split { File inputBam String outputPath String? unaccountedPath - String? filenameFormat = "%!.%." - String? outputFormat = "bam" + String filenameFormat = "%!.%." + String outputFormat = "bam" Boolean writeIndex = false Int threads = 1 From b70891c3aea7314777aaf5122de3beadf10965e3 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 12:27:17 +0100 Subject: [PATCH 1117/1208] noticed in wdl-aid that only these are permitted --- samtools.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index a2be09a4..2fe9a9f7 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -558,10 +558,10 @@ task Split { outputPath: {description: "Directory to store output bams", category: "required"} # Optional parameters - unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "optional"} - filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "format"} - outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "format"} - writeIndex: {description: "Automatically index outputs", category: "indexing"} + unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} + filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} + outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} + writeIndex: {description: "Automatically index outputs", category: "advanced"} # outputs split: {description: "BAM file split by read groups"} From 1ec88558c5b21cb1362518b2c4af95a865abcc68 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:01:26 +0100 Subject: [PATCH 1118/1208] Add compression level parameter, defaulting to 1 --- samtools.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 2fe9a9f7..c46ea88b 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -523,6 +523,8 @@ task Split { String outputFormat = "bam" Boolean writeIndex = false + Int compressionLevel = 1 + Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) @@ -534,6 +536,7 @@ task Split { mkdir -p "~{outputPath}" samtools split \ --output-fmt ~{outputFormat} \ + --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ ~{true="--write-index" false="" writeIndex} \ @@ -562,6 +565,7 @@ task Split { filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} writeIndex: {description: "Automatically index outputs", category: "advanced"} + compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs split: {description: "BAM file split by read groups"} From 153db04100bf78f07b898d523a6da84544d8a02b Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:01:37 +0100 Subject: [PATCH 1119/1208] default to indexing --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index c46ea88b..554d0903 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -521,7 +521,7 @@ task Split { String? unaccountedPath String filenameFormat = "%!.%." String outputFormat = "bam" - Boolean writeIndex = false + Boolean writeIndex = true Int compressionLevel = 1 From 1522785ae1cec9254e5bf57f942260eab2babfd4 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:08:33 +0100 Subject: [PATCH 1120/1208] Remove control of output format --- samtools.wdl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 554d0903..7eba529c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -520,7 +520,6 @@ task Split { String outputPath String? unaccountedPath String filenameFormat = "%!.%." - String outputFormat = "bam" Boolean writeIndex = true Int compressionLevel = 1 @@ -535,7 +534,7 @@ task Split { set -e mkdir -p "~{outputPath}" samtools split \ - --output-fmt ~{outputFormat} \ + --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ @@ -544,7 +543,7 @@ task Split { } output { - Array[File] split = glob(outputPath + "/rg/*." + outputFormat) + Array[File] splitBam = glob(outputPath + "/rg/*.bam") File? unaccounted = unaccountedPath } @@ -563,7 +562,6 @@ task Split { # Optional parameters unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} - outputFormat: {description: "Format of output files (SAM, BAM, CRAM)", category: "advanced"} writeIndex: {description: "Automatically index outputs", category: "advanced"} compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} From 2bba90e99bbc61dc08905a569d8bbb3df285878a Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 13:08:42 +0100 Subject: [PATCH 1121/1208] include indexes --- samtools.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 7eba529c..bfed7560 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -544,6 +544,7 @@ task Split { output { Array[File] splitBam = glob(outputPath + "/rg/*.bam") + Array[File] splitBamIndex = glob(outputPath + "/rg/*.bai") File? unaccounted = unaccountedPath } @@ -566,7 +567,8 @@ task Split { compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs - split: {description: "BAM file split by read groups"} + splitBam: {description: "BAM file split by read groups"} + splitBamIndex: {description: "BAM indexes"} unaccounted: {description: "Reads with no RG tag or an unrecognised RG tag."} } } From bd4a8567cdedabf6aa1e779fa1af731b09e64b49 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 15:19:02 +0100 Subject: [PATCH 1122/1208] write index is non-optional --- samtools.wdl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index bfed7560..1660aac3 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -520,7 +520,6 @@ task Split { String outputPath String? unaccountedPath String filenameFormat = "%!.%." - Boolean writeIndex = true Int compressionLevel = 1 @@ -538,7 +537,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ - ~{true="--write-index" false="" writeIndex} \ + --write-index \ ~{inputBam} } @@ -563,7 +562,6 @@ task Split { # Optional parameters unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"} filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"} - writeIndex: {description: "Automatically index outputs", category: "advanced"} compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"} # outputs From be0aabe03a8615dad5190b5e4c4c9869bb472c2e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 15:49:15 +0100 Subject: [PATCH 1123/1208] make subdirectory as well --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 1660aac3..c452664c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -531,7 +531,7 @@ task Split { command { set -e - mkdir -p "~{outputPath}" + mkdir -p "~{outputPath}/rg/" samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 10e83c1c116d55d148534c7f9fc56056773aadb7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 16:03:06 +0100 Subject: [PATCH 1124/1208] emits csi extension instead --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index c452664c..191a99a2 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -543,7 +543,7 @@ task Split { output { Array[File] splitBam = glob(outputPath + "/rg/*.bam") - Array[File] splitBamIndex = glob(outputPath + "/rg/*.bai") + Array[File] splitBamIndex = glob(outputPath + "/rg/*.bam.csi") File? unaccounted = unaccountedPath } From 6ebf7cd161f15add1c8ed9af8f000ab0952d232c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 7 Mar 2025 16:14:42 +0100 Subject: [PATCH 1125/1208] missing threads --- samtools.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/samtools.wdl b/samtools.wdl index 191a99a2..19ad8dab 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -537,6 +537,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ + --threads ~{threads} \ --write-index \ ~{inputBam} } From 6f9350106827f108f7be38b0d0440a0243174664 Mon Sep 17 00:00:00 2001 From: Helena Date: Mon, 10 Mar 2025 14:00:24 +0100 Subject: [PATCH 1126/1208] Update samtools.wdl --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index fbb445e7..66dc647f 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -167,7 +167,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" - Int timeMinutes = 1 + ceil(size(inputBam) * 2) + Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } From 0ff8d9891a82ff8daf784b782d5007b4ed5cdd16 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 19 Mar 2025 18:33:55 +0100 Subject: [PATCH 1127/1208] Add link to mentioned VEP website to save time in future --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 349242fb..e99c9fdb 100644 --- a/vep.wdl +++ b/vep.wdl @@ -89,7 +89,7 @@ task Vep { # input inputFile: {description: "The VCF to annotate.", category: "required"} outputPath: {description: "Where to put the output file", category: "advanced"} - cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} + cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work (http://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html)", category: "required"} pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} species: {description: "Which species cache to use", category: "common"} plugins: {description: "Which plugins to use", category: "common"} From 3ea61f0d2fe6f16eba1afde9255c15bc368975dd Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 25 Mar 2025 14:56:01 +0100 Subject: [PATCH 1128/1208] Add a samtools quickcheck task which returns the input bam. This is designed to enable us to more quickly catch problematic BAMs, and fail earlier in the pipeline than after we've wasted some significant compute time. --- CHANGELOG.md | 1 + samtools.wdl | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..a41b47cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Added `samtools.Quickcheck` to allow failing on truncated files early. version 5.2.0 --------------------------- diff --git a/samtools.wdl b/samtools.wdl index 66dc647f..ea615bae 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -452,6 +452,46 @@ task Merge { } } +task Quickcheck { + input { + File inputBam + + Int threads = 1 + Int memoryGb = 1 + Int timeMinutes = 1 + String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + } + + command { + set -e + samtools quickcheck ~{inputBam} + } + + output { + File outputBam = inputBam + } + + runtime { + cpu: threads + memory: "~{memoryGb}GiB" + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputBam: {description: "The input BAM/SAM/CRAM file.", category: "required"} + + threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputBam: {description: "The exact same input file, but use this so it is recognised as a dependent task."} + } +} + task Sort { input { File inputBam From 38c5c9ad46e56e6c6e04853bc278e07c24221a28 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 25 Mar 2025 14:20:20 +0100 Subject: [PATCH 1129/1208] Collate fastq file before splitting It was reported to me that the _R1/_R2 from `samtools fastq` were not collated properly, that a single read was appearing in two wildly different places in R1/R2 which is completely silly. I have tried to reproduce this but thus far have been unable to: $ samtools view -b FILE.bam chrM > tmp.bam $ du -h tmp.bam 560K tmp.bam $ samtools fastq -1 paired1.fq -2 paired2.fq -0 /dev/null -s /dev/null -n tmp.bam [M::bam2fq_mainloop] discarded 480 singletons [M::bam2fq_mainloop] processed 608 reads $ diff <(grep ^@D paired1.fq) <(grep ^@D paired2.fq) $ Note the complete lack of difference between ordering. But if we look at the output of files which have come out of this tool, there are clear differences: $ zless R1.fastq.gz | grep '^@' | head -n 3 @D_____________________:1108:3364:16050 @D_____________________:2113:10647:9989 @D_____________________:2208:9374:82968 $ zless R2.fastq.gz | grep '^@' | head -n 3 @D_____________________:1108:3364:16050 @D_____________________:1214:3361:56060 @D_____________________:1309:8329:98995 these were produced by the command $ set -e $ mkdir -p "$(dirname split/R1.fastq.gz)" $ samtools fastq \ -1 split/R1.fastq.gz \ -2 split/R2.fastq.gz \ -n \ --threads 1 \ /mnt/miniwdl/out.bam This is indeed documented behaviour however: > If the input contains read-pairs which are to be interleaved or > written to separate files in the same order, then the input should be > first collated by name. Use samtools collate or samtools sort -n to > ensure this. > > https://www.htslib.org/doc/samtools-fasta.html#DESCRIPTION So it makes some sense to collate, or at some point ensure that the BAMs are sorted. I think there is a discussion to be had over whether automatic collation in sensible or a waste of runtime, but on the other hand, this is maybe a small footgun and eliminating it would make sense to reduce the potential failure modes (give our focus on reducing risk and all.) --- CHANGELOG.md | 1 + samtools.wdl | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..abf77c00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. version 5.2.0 --------------------------- diff --git a/samtools.wdl b/samtools.wdl index 66dc647f..02a5ed52 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -174,6 +174,7 @@ task Fastq { command { set -e mkdir -p "$(dirname ~{outputRead1})" + samtools collate -u -O ~{inputBam} | \ samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-2 " + outputRead2} \ @@ -184,8 +185,7 @@ task Fastq { ~{true="-N" false="-n" appendReadNumber} \ ~{true="-O" false="" outputQuality} \ ~{"-c " + compressionLevel} \ - ~{"--threads " + threads} \ - ~{inputBam} + ~{"--threads " + threads} } output { From 47efde79998bd64c25ef546e6387ff37254fa192 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 26 Mar 2025 12:23:19 +0100 Subject: [PATCH 1130/1208] Hardcode runtime per feedback --- samtools.wdl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index ea615bae..8bb2df87 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -456,9 +456,6 @@ task Quickcheck { input { File inputBam - Int threads = 1 - Int memoryGb = 1 - Int timeMinutes = 1 String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" } @@ -472,9 +469,7 @@ task Quickcheck { } runtime { - cpu: threads - memory: "~{memoryGb}GiB" - time_minutes: timeMinutes + time_minutes: 5 docker: dockerImage } @@ -482,9 +477,6 @@ task Quickcheck { # inputs inputBam: {description: "The input BAM/SAM/CRAM file.", category: "required"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} - memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs From 9fd1c2cfb9431a31d48dab6eaadf9f14faf96326 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 26 Mar 2025 14:13:59 +0100 Subject: [PATCH 1131/1208] do not use default cpu/mem --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 8bb2df87..a009500c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -469,7 +469,9 @@ task Quickcheck { } runtime { + cpu: 1 time_minutes: 5 + memory: "1GiB" docker: dockerImage } From d0cc47c6421d990b2f2ed18b6ef5476cd5a19dd4 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Thu, 27 Mar 2025 17:46:10 +0100 Subject: [PATCH 1132/1208] Add wa/wb/s flags to bedtools intersect Fix bug whereby missing outdir would cause a failure. --- CHANGELOG.md | 1 + bedtools.wdl | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 378731bd..4bd6ae1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ version 6.0.0-dev + Use softlinks to localise the database for centrifuge. + Added the FastqFilter task. + Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. version 5.2.0 --------------------------- diff --git a/bedtools.wdl b/bedtools.wdl index fe18ede6..a5d8aab3 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -267,6 +267,10 @@ task Intersect { File? faidx # Giving a faidx file will set the sorted option. + Boolean writeA = false + Boolean writeB = false + Boolean stranded = false + String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" @@ -276,10 +280,14 @@ task Intersect { command { set -e + mkdir -p "$(dirname ~{outputBed})" ~{"cut -f1,2 " + faidx} ~{true="> sorted.genome" false ="" sorted} bedtools intersect \ -a ~{regionsA} \ -b ~{regionsB} \ + ~{true="-wa" false="" writeA} \ + ~{true="-wb" false="" writeB} \ + ~{true="-s" false="" stranded} \ ~{true="-sorted" false="" sorted} \ ~{true="-g sorted.genome" false="" sorted} \ > ~{outputBed} @@ -301,6 +309,11 @@ task Intersect { regionsB: {description: "Region file b to intersect.", category: "required"} outputBed: {description: "The path to write the output to.", category: "advanced"} faidx: {description: "The fasta index (.fai) file that is used to create the genome file required for sorted output. Implies sorted option.", category: "common"} + + writeA: {description: "Write the original entry in A for each overlap.", category: "advanced"} + writeB: {description: "Write the original entry in B for each overlap. Useful for knowing what A overlaps.", category: "advanced"} + stranded: {description: "Force “strandedness”. That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand.", category: "advanced"} + memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} From fff0fe8fe9cf1f022369dcfb05e5f4980f0f8115 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 08:52:51 +0200 Subject: [PATCH 1133/1208] Update pbmm2 image --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index ea7c05df..91b0b1fe 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -31,7 +31,7 @@ task Mapping { Int cores = 4 String memory = "30GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) - String dockerImage = "quay.io/biocontainers/pbmm2:1.3.0--h56fc30b_1" + String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" } command { From 084486c19bcde6398d41381c0628f5c359c7c53b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 09:05:59 +0200 Subject: [PATCH 1134/1208] Add pbmm2 outputPrefix parameter --- CHANGELOG.md | 2 ++ pbmm2.wdl | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dde73d44..dd536e5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Allow pbmm2 to work with a set output prefix for the BAM file. ++ Update pbmm2 docker container to version 1.17 + Add VEP task. + Add Sequali task. + Add Clair3 task. diff --git a/pbmm2.wdl b/pbmm2.wdl index 91b0b1fe..915fbb02 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -25,6 +25,7 @@ task Mapping { String presetOption Boolean sort=true String sample + String outputPrefix = sample + ".align" File referenceMMI File queryFile @@ -35,6 +36,8 @@ task Mapping { } command { + set -e + mkdir -p ~{outputPrefix} pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ @@ -42,12 +45,12 @@ task Mapping { ~{referenceMMI} \ ~{queryFile} \ --sample ~{sample} \ - ~{sample}.align.bam + ~{outputPrefix}.bam } output { - File outputAlignmentFile = sample + ".align.bam" - File outputIndexFile = sample + ".align.bam.bai" + File outputAlignmentFile = outputPrefix + ".bam" + File outputIndexFile = outputPrefix + ".bam.bai" } runtime { @@ -62,6 +65,7 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} + outputPrefix: {description: "The prefix of the output filename before the .bam extension." category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} @@ -69,7 +73,7 @@ task Mapping { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - # outputs + # output outputAlignmentFile: {description: "Mapped bam file."} outputIndexFile: {description: "Bam index file."} } From 912754990f49d74b69a170bf68901e6ecd1f9557 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:52:02 +0200 Subject: [PATCH 1135/1208] Use a better output prefix Co-authored-by: Davy Cats --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 915fbb02..f8abbd64 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -25,7 +25,7 @@ task Mapping { String presetOption Boolean sort=true String sample - String outputPrefix = sample + ".align" + String outputPrefix = "./~{sample}.align" File referenceMMI File queryFile From 408757f683bf02d0bcf214cd72a4aee732d520d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:52:16 +0200 Subject: [PATCH 1136/1208] Add missing dirname call Co-authored-by: Davy Cats --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index f8abbd64..b00e249e 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -37,7 +37,7 @@ task Mapping { command { set -e - mkdir -p ~{outputPrefix} + mkdir -p $(dirname ~{outputPrefix}) pbmm2 align \ --preset ~{presetOption} \ ~{true="--sort" false="" sort} \ From 8e008554a71cb5de37c69f80321b0d4d39dcf750 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 11:58:13 +0200 Subject: [PATCH 1137/1208] Add missing comma --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index b00e249e..73e74c0c 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -65,7 +65,7 @@ task Mapping { presetOption: {description: "This option applies multiple options at the same time.", category: "required"} sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} - outputPrefix: {description: "The prefix of the output filename before the .bam extension." category: "advanced"} + outputPrefix: {description: "The prefix of the output filename before the .bam extension.", category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} cores: {description: "The number of cores to be used.", category: "advanced"} From 7d6da07cd4dbe09e42cf343e9077d0118e4d1264 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Thu, 27 Mar 2025 17:48:03 +0100 Subject: [PATCH 1138/1208] Deprecated bedgraph option, produce it by default --- CHANGELOG.md | 1 + modkit.wdl | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd536e5e..337a68db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ version 6.0.0-dev + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index 930b6de9..7376a567 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -25,6 +25,7 @@ task Pileup { File bam File bamIndex String outputBed = "output.bedMethyl" + String outputBedGraph = "m_CG0_combined.bedgraph" File referenceFasta File referenceFastaFai @@ -34,7 +35,6 @@ task Pileup { Boolean cpg = false Boolean combineMods = false Boolean combineStrands = false - Boolean bedgraph = false String? ignore String logFilePath = "modkit.log" @@ -42,7 +42,6 @@ task Pileup { String memory = "4GiB" Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" - } command <<< @@ -58,15 +57,17 @@ task Pileup { ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ ~{true="--combine-strands" false="" combineStrands} \ - ~{true="--bedgraph" false="" bedgraph} \ --log-filepath ~{logFilePath} \ ~{bam} \ - ~{outputBed} + - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} >>> + # You can use modkit pileup ${bam_path} - | tee out.bedmethyl | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > out.bg to get both outputs at once without running anything twice. + # https://github.com/nanoporetech/modkit/issues/210#issuecomment-2181706374 + output { - File? out = outputBed # Normal mode - Array[File] outFiles = glob(outputBed + "/*") # Bedgraph mode + File out = outputBed # Normal mode + File outFiles = outputBedGraph # Bedgraph mode File logFile = logFilePath } @@ -104,4 +105,4 @@ task Pileup { outFiles: {description: "Output files when bedgraph = true."} logFile: {description: "The generated log file."} } -} \ No newline at end of file +} From 9d2a4735bf221410b7a1b6b3ad1cd5e5edad3423 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Mar 2025 14:33:52 +0100 Subject: [PATCH 1139/1208] Update parameter_meta for modkit --- modkit.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 7376a567..5ba1f501 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -84,14 +84,14 @@ task Pileup { bamIndex: {description: "The index for the input alignment file", category: "required"} referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} - outputBed: {description: "The output name where the data should be placed.", category: "common"} + outputBed: {description: "The output name where the bedMethyl file should be placed.", category: "common"} + outputBedgraph: {description: "The output name where the bedgraph file should be placed", category: "common"} intervalSize: {description: "Sets the interval size", category: "advanced"} includeBed: {description: "Bed file with regions to include", category: "advanced"} cpg: {description: "Whether to call only at cpg sites", category: "advanced"} combineMods: {description: "Whether to combine modifications in the output", category: "advanced"} combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} - bedgraph: {description: "Whether to create a folder instead with a bedgraph file", category: "advanced"} ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} logFilePath: {description: "Path where the log file should be written.", category: "advanced"} From feaacf40fb1fb2edf4588d63b5baee4f8eac18a2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 28 Mar 2025 14:37:39 +0100 Subject: [PATCH 1140/1208] Fix typo --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 5ba1f501..9311e4da 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -85,7 +85,7 @@ task Pileup { referenceFasta: {description: "The reference fasta file.", category: "required"} referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} outputBed: {description: "The output name where the bedMethyl file should be placed.", category: "common"} - outputBedgraph: {description: "The output name where the bedgraph file should be placed", category: "common"} + outputBedGraph: {description: "The output name where the bedgraph file should be placed", category: "common"} intervalSize: {description: "Sets the interval size", category: "advanced"} includeBed: {description: "Bed file with regions to include", category: "advanced"} From 9e057d6ce259e5fc96ffb04208c37bda8b43ec3e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 31 Mar 2025 14:04:15 +0200 Subject: [PATCH 1141/1208] split into separate files --- modkit.wdl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 9311e4da..78df28f4 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -25,7 +25,7 @@ task Pileup { File bam File bamIndex String outputBed = "output.bedMethyl" - String outputBedGraph = "m_CG0_combined.bedgraph" + String outputBedGraph = "combined.bedgraph" File referenceFasta File referenceFastaFai @@ -59,7 +59,9 @@ task Pileup { ~{true="--combine-strands" false="" combineStrands} \ --log-filepath ~{logFilePath} \ ~{bam} \ - - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} + - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10 >> "~{outputBedGraph}_"$4"_"$6".bedGraph"}' + # Separately generate the combined file as well, so users can have a choice. + cat ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph} >>> # You can use modkit pileup ${bam_path} - | tee out.bedmethyl | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > out.bg to get both outputs at once without running anything twice. @@ -67,7 +69,8 @@ task Pileup { output { File out = outputBed # Normal mode - File outFiles = outputBedGraph # Bedgraph mode + File outGraph = outputBedGraph # Normal mode + Array[File] outFiles = glob(outputBedGraph + "*.bedGraph") # Bedgraph mode File logFile = logFilePath } From e439d58c8e9584c8957a4ecb265ce5f7de9f96ce Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 31 Mar 2025 14:06:31 +0200 Subject: [PATCH 1142/1208] Add Mosdepth task --- CHANGELOG.md | 1 + mosdepth.wdl | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 mosdepth.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index dd536e5e..986dfd13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 + Add VEP task. diff --git a/mosdepth.wdl b/mosdepth.wdl new file mode 100644 index 00000000..0f800769 --- /dev/null +++ b/mosdepth.wdl @@ -0,0 +1,106 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Mosdepth { + input { + File bam + File bamIndex + String prefix = "./out" + + String? chrom + # --by flag takes a BED file or an integer. So there need to be two inputs in WDL's typed system. + File? byBed + Int? byWindow + File? fasta + Int? flag + Int? includeFlag + + Boolean noPerBase = false + Boolean d4 = false + Boolean fastMode = false + + Int threads = 1 + String memory = "1GiB" + Int timeMinutes = 10 + ceil(size(bam, "G")) * 4 + String dockerImage = "quay.io/biocontainers/mosdepth:0.3.10--h4e814b3_1" + } + + command <<< + set -e + mkdir -p $(dirname ~{prefix}) + mosdepth \ + --threads ~{threads} \ + ~{"--chrom " + chrom} \ + ~{"--by " + byBed} \ + ~{"--by " + byWindow} \ + ~{"--fasta " + fasta} \ + ~{true="--no-per-base" false="" noPerBase} \ + ~{true="--d4" false="" d4} \ + ~{"--flag " + flag} \ + ~{"--include-flag " + includeFlag} \ + ~{true="--fast-mode" false="" fastMode} \ + ~{prefix} ~{bam} + >>> + + output { + File globalDist = "~{prefix}.mosdepth.global.dist.txt" + File summary = "~{prefix}.mosdepth.summary.txt" + File? perBaseBed = "~{prefix}.per-base.bed.gz" + File? regionsBed = "~{prefix}.regions.bed.gz" + } + + runtime { + cpu: threads + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bam: {description: "Input BAM or CRAM file.", category: "required"} + bamIndex: {description: "Index for the input BAM or CRAM file.", category: "required"} + prefix: {description: "Output prefix.", category: "common"} + + chrom: {description: "Chromosome to restrict depth calculation.", category: "advanced"} + byBed: {description: "Bed file with windows to include for the --by flag. Should not be used together with byWindow.", category: "common"} + byWindow: {description: "Integer window size for the --by flag. Should not be used together with byBed.", category: "advanced"} + fasta: {description: "FASTA file, only necessary when CRAM input is used.", category: "advanced"} + flag: {description: "Exclude reads with any of the bits in FLAG set.", category: "advanced"} + includeFlag: {description: "Only include reads with any of the bits in FLAG set.", category: "advanced"} + + noPerBase: {description: "Don't output per-base depth. Skipping this output will speed execution.", category: "common"} + d4: {description: "output per-base depth in d4 format.", category: "advanced"} + fastMode: {description: "Don't look at internal cigar operations or correct mate overlaps (recommended for most use-cases).", category: "common"} + + threads: {description: "How many threads to use.", category: "common"} + memory: {description: "How much memory to allocate.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + globalDist: {description: "Global distribution table file."} + summary: {description: "Summary table file."} + perBaseBed: {description: "Per base coverage BED file."} + regionsBed: {description: "Per region BED file, if byBed or byWindow is used."} + } +} \ No newline at end of file From 7bcac8ea2636cbeeae247d783c0dc5558bb0955a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:22:31 +0200 Subject: [PATCH 1143/1208] Update all samtools images --- samtools.wdl | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index d724a692..2388813e 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -72,7 +72,7 @@ task DictAndFaidx { String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputFile = basename(inputFile) @@ -119,7 +119,7 @@ task Faidx { String outputDir String memory = "2GiB" - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -168,7 +168,7 @@ task Fastq { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -232,7 +232,7 @@ task FilterShortReadsBam { String memory = "1GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai") @@ -278,7 +278,7 @@ task Flagstat { String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -318,7 +318,7 @@ task Index { String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } # Select_first is needed, otherwise womtool validate fails. @@ -369,7 +369,7 @@ task Markdup { String outputBamPath Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -408,7 +408,7 @@ task Merge { Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String indexPath = sub(outputBamPath, "\.bam$",".bai") @@ -456,7 +456,7 @@ task Quickcheck { input { File inputBam - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -497,7 +497,7 @@ task Sort { Int threads = 1 Int memoryGb = 1 + threads * memoryPerThreadGb Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } # Select first needed as outputPath is optional input (bug in cromwell). @@ -560,7 +560,7 @@ task Split { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } command { @@ -669,7 +669,7 @@ task View { Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/samtools:1.16.1--h6899075_1" + String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } String outputIndexPath = basename(outputFileName) + ".bai" From 435a719147253df23cad2674736d8d699b186e77 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:56:35 +0200 Subject: [PATCH 1144/1208] Task updates to samtools.wdl --- CHANGELOG.md | 6 +++++ samtools.wdl | 72 +++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 58 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986dfd13..8b95b904 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Update docker images in samtools.wdl ++ Add threads and compression levels to applicable tasks. Default to + compression level 1. ++ samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is + the name of the flag. ++ Unused javaXmx parameter removed from samtools DictAndFaidx + Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 diff --git a/samtools.wdl b/samtools.wdl index 2388813e..30e938b4 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -24,11 +24,13 @@ task BgzipAndIndex { input { File inputFile String outputDir - String type = "vcf" + String preset = "vcf" + Int compressLevel = 1 + Int threads = 1 String memory = "2GiB" Int timeMinutes = 1 + ceil(size(inputFile, "GiB")) - String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" + String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1" } String outputGz = outputDir + "/" + basename(inputFile) + ".gz" @@ -36,8 +38,15 @@ task BgzipAndIndex { command { set -e mkdir -p "$(dirname ~{outputGz})" - bgzip -c ~{inputFile} > ~{outputGz} - tabix ~{outputGz} -p ~{type} + bgzip \ + --threads ~{threads} \ + --compress-level ~{compressLevel} \ + -c ~{inputFile} > ~{outputGz} + + tabix \ + --preset ~{preset} \ + --threads ~{threads - 1} \ + ~{outputGz} } output { @@ -46,6 +55,7 @@ task BgzipAndIndex { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -55,7 +65,7 @@ task BgzipAndIndex { # inputs inputFile: {description: "The file to be compressed and indexed.", category: "required"} outputDir: {description: "The directory in which the output will be placed.", category: "required"} - type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"} + preset: {description: "The preset for the file (eg. vcf or bed) to be compressed and indexed.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -69,7 +79,6 @@ task BgzipAndIndex { task DictAndFaidx { input { File inputFile - String javaXmx = "2G" String memory = "3GiB" Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -102,7 +111,6 @@ task DictAndFaidx { parameter_meta { # inputs inputFile: {description: "The input fasta file.", category: "required"} - javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -163,7 +171,7 @@ task Fastq { Int? includeFilter Int? excludeFilter Int? excludeSpecificFilter - Int? compressionLevel + Int compressionLevel = 1 Int threads = 1 String memory = "1GiB" @@ -184,8 +192,8 @@ task Fastq { ~{"-G " + excludeSpecificFilter} \ ~{true="-N" false="-n" appendReadNumber} \ ~{true="-O" false="" outputQuality} \ - ~{"-c " + compressionLevel} \ - ~{"--threads " + threads} + -c ~{compressionLevel} \ + "--threads " ~{threads - 1} } output { @@ -276,6 +284,8 @@ task Flagstat { File inputBam String outputPath + Int threads = 1 + String memory = "256MiB" # Only 40.5 MiB used for 150G bam file. Int timeMinutes = 1 + ceil(size(inputBam, "G")) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -284,7 +294,9 @@ task Flagstat { command { set -e mkdir -p "$(dirname ~{outputPath})" - samtools flagstat ~{inputBam} > ~{outputPath} + samtools flagstat \ + --threads ~{threads - 1} + ~{inputBam} > ~{outputPath} } output { @@ -292,6 +304,7 @@ task Flagstat { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -316,6 +329,8 @@ task Index { String? outputBamPath + Int threads = 1 + String memory = "2GiB" Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -334,7 +349,9 @@ task Index { mkdir -p "$(dirname ~{outputPath})" ln ~{bamFile} ~{outputPath} || cp ~{bamFile} ~{outputPath} fi - samtools index ~{outputPath} ~{bamIndexPath} + samtools index \ + --threads ~{threads -1} \ + ~{outputPath} ~{bamIndexPath} ' } @@ -344,6 +361,7 @@ task Index { } runtime { + cpu: threads memory: memory time_minutes: timeMinutes docker: dockerImage @@ -367,6 +385,7 @@ task Markdup { input { File inputBam String outputBamPath + Int threads = 1 Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" @@ -375,7 +394,9 @@ task Markdup { command { set -e mkdir -p "$(dirname ~{outputBamPath})" - samtools markdup ~{inputBam} ~{outputBamPath} + samtools markdup \ + --threads ~{threads - 1} \ + ~{inputBam} ~{outputBamPath} } output { @@ -383,6 +404,7 @@ task Markdup { } runtime { + cpu: threads docker: dockerImage time_minutes: timeMinutes } @@ -405,6 +427,10 @@ task Merge { String outputBamPath = "merged.bam" Boolean force = true + Boolean combineRGHeaders = false + Boolean combinePGHeaders = false + + Int compressionLevel = 1 Int threads = 1 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) @@ -420,6 +446,9 @@ task Merge { samtools merge \ --threads ~{threads - 1} \ ~{true="-f" false="" force} \ + -l ~{compressionLevel} \ + ~{true="-c" false="" combineRGHeaders} \ + ~{true="-p" false="" combinePGHeaders} \ ~{outputBamPath} ~{sep=' ' bamFiles} samtools index ~{outputBamPath} ~{indexPath} } @@ -514,7 +543,7 @@ task Sort { -o ~{outputPath} \ ~{inputBam} samtools index \ - -@ ~{threads} \ + --threads ~{threads - 1} \ ~{outputPath} ~{bamIndexPath} } @@ -571,7 +600,7 @@ task Split { --output-fmt-option level=~{compressionLevel} \ -f "~{outputPath}/rg/~{filenameFormat}" \ ~{"-u " + unaccountedPath} \ - --threads ~{threads} \ + --threads ~{threads - 1} \ --write-index \ ~{inputBam} } @@ -610,10 +639,10 @@ task Tabix { input { File inputFile String outputFilePath = basename(inputFile) - String type = "vcf" + String preset = "vcf" Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0" + String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1" } # FIXME: It is better to do the indexing on VCF creation. @@ -625,7 +654,7 @@ task Tabix { then ln ~{inputFile} ~{outputFilePath} || cp ~{inputFile} ~{outputFilePath} fi - tabix ~{outputFilePath} -p ~{type} + tabix ~{outputFilePath} -p ~{preset} } output { @@ -643,7 +672,7 @@ task Tabix { # inputs inputFile: {description: "The file to be indexed.", category: "required"} outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"} - type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"} + preset: {description: "The preset for the file (eg. vcf or bed) to be indexed.", category: "common"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -666,6 +695,8 @@ task View { Int? MAPQthreshold File? targetFile + Boolean fast = false # Default should be true, unless a non-BAM format is preferred. So th + Int threads = 1 String memory = "1GiB" Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5) @@ -682,11 +713,12 @@ task View { ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ ~{true="-u " false="" uncompressedBamOutput} \ + ~{true="--fast" false="" fast} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ ~{"-q " + MAPQthreshold} \ - ~{"--threads " + (threads - 1)} \ + --threads ~{threads - 1} \ ~{"--target-file " + targetFile} \ ~{inFile} samtools index ~{outputFileName} ~{outputIndexPath} From d20b313ea01c0dc3fe318206daac4d976c22bc5b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:58:17 +0200 Subject: [PATCH 1145/1208] Increase mosdepth default memory --- mosdepth.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mosdepth.wdl b/mosdepth.wdl index 0f800769..43e95614 100644 --- a/mosdepth.wdl +++ b/mosdepth.wdl @@ -39,7 +39,7 @@ task Mosdepth { Boolean fastMode = false Int threads = 1 - String memory = "1GiB" + String memory = "4GiB" Int timeMinutes = 10 + ceil(size(bam, "G")) * 4 String dockerImage = "quay.io/biocontainers/mosdepth:0.3.10--h4e814b3_1" } From 046eecb3af6887d6aad1c31a4521951822683259 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 09:59:19 +0200 Subject: [PATCH 1146/1208] Allocate more time for merging --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 30e938b4..915bb848 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -433,7 +433,7 @@ task Merge { Int compressionLevel = 1 Int threads = 1 String memory = "4GiB" - Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 2) + Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" } From f5765ffd1e75964a43da36c500741610e005c554 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 10:20:18 +0200 Subject: [PATCH 1147/1208] Update clair3 image --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 4d9092f2..57984a32 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -35,7 +35,7 @@ task Clair3 { Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) - String dockerImage = "quay.io/biocontainers/clair3:1.0.10--py39h46983ab_0" + String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" } String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" From bfd433dd4f698bf141c7add6cc42ea58d56ca3a2 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 10:25:45 +0200 Subject: [PATCH 1148/1208] Update deepvariant image --- deepvariant.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index 2d212000..e9e6c18c 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -35,11 +35,11 @@ task RunDeepVariant { String? outputGVcfIndex File? regions String? sampleName - Boolean? VCFStatsReport = true + Boolean VCFStatsReport = true String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.6.1" + String dockerImage = "google/deepvariant:1.8.0" } command { From cfbc34deb566ddb2ce0561168c7fb3dd3b0ae1e6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 11:30:12 +0200 Subject: [PATCH 1149/1208] Update several images --- CHANGELOG.md | 1 + modkit.wdl | 2 +- multiqc.wdl | 2 +- picard.wdl | 34 +++++++++++++++++----------------- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b95b904..8c13cacc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ version 6.0.0-dev + samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is the name of the flag. + Unused javaXmx parameter removed from samtools DictAndFaidx ++ Update Picard images + Add Mosdepth task. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 diff --git a/modkit.wdl b/modkit.wdl index 930b6de9..6a7d9b4d 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -41,7 +41,7 @@ task Pileup { Int threads = 8 String memory = "4GiB" Int timeMinutes = 2880 / threads # 2 Days / threads - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" } diff --git a/multiqc.wdl b/multiqc.wdl index a2e32cdb..fae52178 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -58,7 +58,7 @@ task MultiQC { String? memory Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/multiqc:1.25.1--pyhdfd78af_0" + String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" } Int memoryGb = 2 + ceil(size(reports, "GiB")) diff --git a/picard.wdl b/picard.wdl index 6628cf0e..fd072523 100644 --- a/picard.wdl +++ b/picard.wdl @@ -29,7 +29,7 @@ task BedToIntervalList { String javaXmx = "3G" String memory = "4GiB" Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -89,7 +89,7 @@ task CollectHsMetrics { # Additional * 2 because picard multiple metrics reads the # reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -210,7 +210,7 @@ task CollectMultipleMetrics { Int memoryMb = javaXmxMb + 512 # Additional * 2 because picard multiple metrics reads the reference fasta twice. Int timeMinutes = 1 + ceil(size(referenceFasta, "GiB") * 3 * 2) + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -336,7 +336,7 @@ task CollectRnaSeqMetrics { String memory = "9GiB" # With 6 minutes per G there were several timeouts. Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 12) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -394,7 +394,7 @@ task CollectTargetedPcrMetrics { String javaXmx = "3G" String memory = "4GiB" Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 6) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -456,7 +456,7 @@ task CollectVariantCallingMetrics { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1440 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -569,7 +569,7 @@ task CreateSequenceDictionary { String javaXmx = "2G" String memory = "3GiB" - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -621,7 +621,7 @@ task GatherBamFiles { Int memoryMb = javaXmxMb + 512 # One minute per input gigabyte. Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 1) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -684,7 +684,7 @@ task GatherVcfs { String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputVcfs, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -753,7 +753,7 @@ task MarkDuplicates { String memoryMb = javaXmxMb + 512 Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 8) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } # Task is assuming query-sorted input so that the Secondary and Supplementary reads get @@ -836,7 +836,7 @@ task MergeVCFs { String javaXmx = "4G" String memory = "5GiB" Int timeMinutes = 1 + ceil(size(inputVCFs, "GiB")) * 2 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } # Using MergeVcfs instead of GatherVcfs so we can create indices. @@ -892,7 +892,7 @@ task SamToFastq { String javaXmx = "16G" # High memory default to avoid crashes. String memory = "17GiB" Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" File? noneFile } @@ -953,7 +953,7 @@ task ScatterIntervalList { String javaXmx = "3G" String memory = "4GiB" - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -996,7 +996,7 @@ task SortSam { # GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778 Int XmxGb = ceil(maxRecordsInRam / 125001.0) Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -1058,7 +1058,7 @@ task SortVcf { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(vcfFiles, "GiB") * 5) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } @@ -1108,7 +1108,7 @@ task RenameSample { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 1 + ceil(size(inputVcf, "GiB") * 2) - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { @@ -1163,7 +1163,7 @@ task UmiAwareMarkDuplicatesWithMateCigar { String javaXmx = "8G" String memory = "9GiB" Int timeMinutes = 360 - String dockerImage = "quay.io/biocontainers/picard:2.26.10--hdfd78af_0" + String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0" } command { From d31f74badd4e6d8f8c1f397c4478ffa20e32437e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:02:11 +0200 Subject: [PATCH 1150/1208] Make resource requirements for pbmm2 and minimap2 somewhat equal --- CHANGELOG.md | 2 ++ minimap2.wdl | 8 ++++---- pbmm2.wdl | 32 +++++++++++++++++++++++--------- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c13cacc..5fa636d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ version 6.0.0-dev + Unused javaXmx parameter removed from samtools DictAndFaidx + Update Picard images + Add Mosdepth task. ++ pbmm2 loses the sort parameter. Output is now always sorted. ++ pbmm2 gets an unmapped parameter. + Allow pbmm2 to work with a set output prefix for the BAM file. + Update pbmm2 docker container to version 1.17 + Add VEP task. diff --git a/minimap2.wdl b/minimap2.wdl index da301bd3..a7584beb 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -86,8 +86,6 @@ task Mapping { File queryFile Int compressionLevel = 1 - Int additionalSortThreads = 1 - Int sortMemoryGb = 1 Boolean nameSorted = false # MM, ML, MN -> Methylation flags # Also keep the following flags for Sequali to be able to run on the mapped bam file and get ONT information. @@ -112,6 +110,8 @@ task Mapping { String? howToFindGTAG String? readgroup + Int sortThreads = 2 + Int sortMemoryGb = 1 Int cores = 8 String memory = "24GiB" Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores) @@ -147,7 +147,7 @@ task Mapping { - \ | samtools sort \ ~{true="-N" false="" nameSorted} \ - -@ ~{additionalSortThreads} \ + --threads ~{sortThreads - 1} \ -l ~{compressionLevel} \ -m ~{sortMemoryGb}G \ -o ~{outputPrefix}.bam @@ -184,7 +184,7 @@ task Mapping { tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} compressionLevel: {description: "compressionLevel for the output file", category: "advanced"} - additionalSortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} nameSorted: {description: "Output a name sorted file instead", category: "common"} diff --git a/pbmm2.wdl b/pbmm2.wdl index 73e74c0c..23133278 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -23,25 +23,36 @@ version 1.0 task Mapping { input { String presetOption - Boolean sort=true + Boolean unmapped = false String sample String outputPrefix = "./~{sample}.align" File referenceMMI File queryFile - Int cores = 4 - String memory = "30GiB" - Int timeMinutes = 1 + ceil(size(queryFile, "G") * 2000 / cores) + Int sortMemoryGb = 1 + Int sortThreads = 2 + Int cores = 8 + String memory = "24GiB" + # Slightly higher than minimap2 as compression level can not be set. + Int timeMinutes = 1 + ceil(size(queryFile, "G") * 400 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" } + # Use cores+sortThreads to set the number of threads. Internally pbmm2 + # allocates cores - sortThreads to alignment. This leads to underutilization + # of the requested resources. Sorting uses very little CPU until the point + # comes that the memory is full and the temporary file needs to be written. + # At this point the alignment halts because the pipe is full. command { set -e mkdir -p $(dirname ~{outputPrefix}) pbmm2 align \ --preset ~{presetOption} \ - ~{true="--sort" false="" sort} \ - -j ~{cores} \ + --sort \ + ~{true="--unmapped" false="" unmapped} \ + --num-threads ~{cores + sortThreads} \ + --sort-memory ~{sortMemoryGb}G \ + --sort-threads ~{sortThreads} \ ~{referenceMMI} \ ~{queryFile} \ --sample ~{sample} \ @@ -63,15 +74,18 @@ task Mapping { parameter_meta { # inputs presetOption: {description: "This option applies multiple options at the same time.", category: "required"} - sort: {description: "Sort the output bam file.", category: "advanced"} sample: {description: "Name of the sample.", category: "required"} outputPrefix: {description: "The prefix of the output filename before the .bam extension.", category: "advanced"} referenceMMI: {description: "MMI file for the reference.", category: "required"} queryFile: {description: "BAM file with reads to align against the reference.", category: "required"} - cores: {description: "The number of cores to be used.", category: "advanced"} - memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + unmapped: {description: "Include unmapped reads in the output.", category: "common"} + + sortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} + cores: {description: "The number of cores to be used.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} # output outputAlignmentFile: {description: "Mapped bam file."} From 046947847255c3323524f1c92004a66ec026b7c1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:21:51 +0200 Subject: [PATCH 1151/1208] Increase default thread count for samtools merge --- CHANGELOG.md | 1 + samtools.wdl | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fa636d8..0781e4b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Samtools merge default thread count increased to 8. + Update docker images in samtools.wdl + Add threads and compression levels to applicable tasks. Default to compression level 1. diff --git a/samtools.wdl b/samtools.wdl index 915bb848..7a2223f6 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -431,7 +431,8 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - Int threads = 1 + # Merging is often a bottleneck. Set a high number of threads to decrease wall clock time. + Int threads = 8 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From b063b9ba79e41f3d20c64ded779a2953a1f7ec55 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 13:53:28 +0200 Subject: [PATCH 1152/1208] more time for clair3 --- clair3.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clair3.wdl b/clair3.wdl index 57984a32..5a6154af 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,7 +34,7 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + Int timeMinutes = 10 + ceil(size(bam, "G") * 400 / threads) String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" } From d502298c8ec0e594cace54e573e68b2e7a4d9041 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 14:54:27 +0200 Subject: [PATCH 1153/1208] Make sequali runtime dependent on input file size --- sequali.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sequali.wdl b/sequali.wdl index b43cf281..cbca3653 100644 --- a/sequali.wdl +++ b/sequali.wdl @@ -29,7 +29,7 @@ task Sequali { Int threads = 2 String memory = "4GiB" String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0" - Int timeMinutes = 59 + Int timeMinutes = 10 + ceil(size(reads, "GiB") + size(mate_reads, "GiB")) * 4 } command <<< From b942c7ed0a833c830aabb227a15d78ca89aecc3e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 2 Apr 2025 14:58:12 +0200 Subject: [PATCH 1154/1208] Slightly higher requirements for pbmm2 than minimap2 --- pbmm2.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pbmm2.wdl b/pbmm2.wdl index 23133278..9155e7b2 100644 --- a/pbmm2.wdl +++ b/pbmm2.wdl @@ -32,7 +32,7 @@ task Mapping { Int sortMemoryGb = 1 Int sortThreads = 2 Int cores = 8 - String memory = "24GiB" + String memory = "30GiB" # Slightly higher than minimap2 as compression level can not be set. Int timeMinutes = 1 + ceil(size(queryFile, "G") * 400 / cores) String dockerImage = "quay.io/biocontainers/pbmm2:1.17.0--h9ee0642_0" From d2ac7b2ad030a00d83aa5a0100f79ec5e16dd5d1 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 2 Apr 2025 18:20:33 +0200 Subject: [PATCH 1155/1208] Add filterThreshold, filterPercent to modkit pileup --- modkit.wdl | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modkit.wdl b/modkit.wdl index 78df28f4..a611a620 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -31,6 +31,8 @@ task Pileup { Int? intervalSize File? includeBed + String? filterThreshold + String? filterPercentile Boolean cpg = false Boolean combineMods = false @@ -57,6 +59,8 @@ task Pileup { ~{true="--cpg" false="" cpg} \ ~{true="--combine-mods" false="" combineMods} \ ~{true="--combine-strands" false="" combineStrands} \ + ~{"--filter-percentile " + filterPercentile} \ + ~{"--filter-threshold " + filterThreshold} \ --log-filepath ~{logFilePath} \ ~{bam} \ - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10 >> "~{outputBedGraph}_"$4"_"$6".bedGraph"}' @@ -97,12 +101,14 @@ task Pileup { combineStrands: {description: "Whether to combine strands in the output", category: "advanced"} ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"} logFilePath: {description: "Path where the log file should be written.", category: "advanced"} + filterThreshold: {description: "Global filter threshold can be specified with by a decimal number (e.g. 0.75). Otherwise the automatic filter percentile will be used.", category: "advanced"} + filterPercentile: {description: "This defaults to 0.1, to remove the lowest 10% confidence modification calls, but can be manually adjusted", category: "advanced"} threads: {description: "The number of threads to use for variant calling.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - + # output out: {description: "The output bed files. Not available when bedgraph = true."} outFiles: {description: "Output files when bedgraph = true."} From 204821385c3d176c3425d7052b6f3905ff46541d Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 2 Apr 2025 18:21:11 +0200 Subject: [PATCH 1156/1208] Add a summary task --- CHANGELOG.md | 2 ++ modkit.wdl | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ed79b5b..57519f04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ version 6.0.0-dev + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. + Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. ++ Add support for filterThreshold/filterPercent for `modkit.Pileup`. ++ Add `modkit.Summary` task. version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index a611a620..7546458a 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -115,3 +115,65 @@ task Pileup { logFile: {description: "The generated log file."} } } + +task Summary { + input { + File bam + File bamIndex + + String summary = "modkit.summary.txt" + + Boolean sample = true + Int? numReads # = 10042 + Float? samplingFrac # = 0.1 + Int? seed + + Int threads = 4 + String memory = ceil(size(bam, "GiB") * 0.20) + 10 # Based on a linear model with some fudge (y=-0.13x - 4). + Int timeMinutes = 2880 / threads # 2 Days / threads + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p $(dirname ~{summary}) + + modkit summary \ + --threads ~{threads} \ + ~{true="" false="--no-sampling" sample} \ + ~{"--num-reads " + numReads} \ + ~{"--sampling-frac " + samplingFrac} \ + ~{"--seed " + seed} \ + ~{bam} > ~{summary} + >>> + + output { + File summaryReport = summary # Normal mode + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} From a9ec6faf3de64e110209ed2c81b1272e765a6247 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Apr 2025 13:24:46 +0200 Subject: [PATCH 1157/1208] Downgrade deepvariant because of a bug --- deepvariant.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index e9e6c18c..c700416f 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -39,7 +39,9 @@ task RunDeepVariant { String memory = "48GiB" Int timeMinutes = 5000 - String dockerImage = "google/deepvariant:1.8.0" + # Version 1.8.0 has a bug. + # https://github.com/google/deepvariant/issues/912 + String dockerImage = "google/deepvariant:1.6.1" } command { From 741f9708383ff29d0f6f548f9fffad0b8eb7ab37 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 4 Apr 2025 13:26:56 +0200 Subject: [PATCH 1158/1208] Increase time limit for VEP --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index e99c9fdb..2c1f923b 100644 --- a/vep.wdl +++ b/vep.wdl @@ -36,7 +36,7 @@ task Vep { String memory = "8GiB" # Account time for unpacking the cache. - Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 3) + Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 15) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } From 4fe49b8ef3f1bae978b2fa07ac6e08a282e2f91f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 09:56:43 +0200 Subject: [PATCH 1159/1208] Update samtools parameter_meta --- CHANGELOG.md | 2 +- samtools.wdl | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0781e4b1..1180578a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ version 6.0.0-dev --------------------------- + Samtools merge default thread count increased to 8. + Update docker images in samtools.wdl -+ Add threads and compression levels to applicable tasks. Default to ++ Add threads and compression levels to applicable tasks in samtools. Default to compression level 1. + samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is the name of the flag. diff --git a/samtools.wdl b/samtools.wdl index 7a2223f6..cd24e6e9 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -69,6 +69,8 @@ task BgzipAndIndex { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + compressLevel: {description: "Set compression level.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs compressed: {description: "Compressed input file."} @@ -317,6 +319,7 @@ task Flagstat { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs flagstat: {description: "The number of alignments for each FLAG type."} @@ -374,6 +377,7 @@ task Index { memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs indexedBam: {description: "BAM file that was indexed."} @@ -415,6 +419,7 @@ task Markdup { outputBamPath: {description: "The location of the output BAM file.", category: "required"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + threads: {description: "The number of threads to use.", category: "advanced"} # outputs outputBam: {description: "BAM file with duplicate alignments marked."} @@ -471,6 +476,10 @@ task Merge { bamFiles: {description: "The BAM files to merge.", category: "required"} outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"} force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"} + + combineRGHeaders: {description: "Combine @RG headers with colliding IDs", category: "advanced"} + combinePGHeaders: {description: "Combine @PG headers with colliding IDs", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} @@ -567,7 +576,7 @@ task Sort { sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"} compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"} - threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"} + threads: {description: "The number of threads that will be used for this task.", category: "advanced"} memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} @@ -696,7 +705,7 @@ task View { Int? MAPQthreshold File? targetFile - Boolean fast = false # Default should be true, unless a non-BAM format is preferred. So th + Boolean fast = true # Sets compression level to 1. Int threads = 1 String memory = "1GiB" @@ -707,14 +716,15 @@ task View { String outputIndexPath = basename(outputFileName) + ".bai" # Always output to bam and output header. + # -u should be after --fast, and will override it in that case. command { set -e mkdir -p "$(dirname ~{outputFileName})" samtools view -b \ ~{"-T " + referenceFasta} \ ~{"-o " + outputFileName} \ - ~{true="-u " false="" uncompressedBamOutput} \ ~{true="--fast" false="" fast} \ + ~{true="-u " false="" uncompressedBamOutput} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -741,6 +751,7 @@ task View { # inputs inFile: {description: "A BAM, SAM or CRAM file.", category: "required"} outputFileName: {description: "The location the output BAM file should be written.", category: "common"} + fast: {description: "Sets compression level to 1. Set to true by default.", category: "common"} uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"} referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"} includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"} From 6a78f520a6efee6def3fcc257f5ea3be02daf8cb Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:21:57 +0200 Subject: [PATCH 1160/1208] Increase deep variant shards and explain memory usage --- deepvariant.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/deepvariant.wdl b/deepvariant.wdl index c700416f..b0ed2a19 100644 --- a/deepvariant.wdl +++ b/deepvariant.wdl @@ -30,13 +30,17 @@ task RunDeepVariant { String outputVcf = "sample.vcf.gz" String? postprocessVariantsExtraArgs File? customizedModel - Int numShards = 4 + Int numShards = 8 String? outputGVcf String? outputGVcfIndex File? regions String? sampleName Boolean VCFStatsReport = true + # Most of the memory used is at the end, in the step where the variants + # are merged. This is a single-threaded high memory step. The number + # of shards does not influence the memory so much. + # The provided memory here is enough for merge human chromosome 1. String memory = "48GiB" Int timeMinutes = 5000 # Version 1.8.0 has a bug. From ba35d987ca3fe3c27a01034d60cd2ab09369ab31 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:36:03 +0200 Subject: [PATCH 1161/1208] Set a lower number of threads for samtools merge to decrease waste --- samtools.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index cd24e6e9..cb8dbd55 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -436,8 +436,9 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - # Merging is often a bottleneck. Set a high number of threads to decrease wall clock time. - Int threads = 8 + # Merging is often a bottleneck. With compression level 1 however, + # more than three threads does not add more benefit. + Int threads = 3 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From 8b41a7feddf6e1f29af7fd825cad6a0ae6811687 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 10:45:14 +0200 Subject: [PATCH 1162/1208] Dynamically set samtools merge threads --- samtools.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index cb8dbd55..7dd9ecc1 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -436,9 +436,8 @@ task Merge { Boolean combinePGHeaders = false Int compressionLevel = 1 - # Merging is often a bottleneck. With compression level 1 however, - # more than three threads does not add more benefit. - Int threads = 3 + # Use one thread per input + one for the output + one for merging + Int threads = length(bamFiles) + 2 String memory = "4GiB" Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4) String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1" From 17cf284d2c54212b29cdf4e6a347adc0e0a0c458 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 16:38:42 +0200 Subject: [PATCH 1163/1208] Also use threads for faster indexing --- samtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 7dd9ecc1..811f56e0 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -456,7 +456,7 @@ task Merge { ~{true="-c" false="" combineRGHeaders} \ ~{true="-p" false="" combinePGHeaders} \ ~{outputBamPath} ~{sep=' ' bamFiles} - samtools index ~{outputBamPath} ~{indexPath} + samtools index -@ ~{threads - 1} ~{outputBamPath} ~{indexPath} } output { From 1fae30492bdff1af750ac963d565cbb16cc6572b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 16:53:59 +0200 Subject: [PATCH 1164/1208] Add missing parameter_meta --- samtools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index 811f56e0..743fce0c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -480,6 +480,8 @@ task Merge { combineRGHeaders: {description: "Combine @RG headers with colliding IDs", category: "advanced"} combinePGHeaders: {description: "Combine @PG headers with colliding IDs", category: "advanced"} + compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"} + threads: {description: "Number of threads to use.", category: "common"} memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From d485e17399c3482aa109e0d1055c2b2bac9d93a4 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Apr 2025 08:30:54 +0200 Subject: [PATCH 1165/1208] Update CHANGELOG.md --- CHANGELOG.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1180578a..96adc8fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- -+ Samtools merge default thread count increased to 8. ++ MultiQC image updated to version 1.28 ++ Samtools merge now has options added for merging RG and PG headers. ++ Samtools merge default thread count increased based on the number of files. + Update docker images in samtools.wdl + Add threads and compression levels to applicable tasks in samtools. Default to compression level 1. From 847ad71a26b3a1ddc1fc06c2fda349fc620ad2b5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 19:40:40 +0200 Subject: [PATCH 1166/1208] Update vt to allow a filter expression and compressed indexed output --- CHANGELOG.md | 1 + vt.wdl | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96adc8fa..dfa40b75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. + Samtools merge default thread count increased based on the number of files. diff --git a/vt.wdl b/vt.wdl index 4da2d8cd..4ced1d2a 100644 --- a/vt.wdl +++ b/vt.wdl @@ -27,27 +27,39 @@ task Normalize { File referenceFasta File referenceFastaFai Boolean ignoreMaskedRef = false - String outputPath = "./vt/normalized_decomposed.vcf" + String outputPath = "./vt/normalized_decomposed.vcf.gz" + String? filterExpression + + Int compressionLevel = 1 String memory = "4GiB" - Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + Int timeMinutes = 10 + ceil(size(inputVCF, "GiB") * 240) + String dockerImage = "quay.io/biocontainers/vt:0.57721--h2419454_12" } command { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" - vt normalize ~{inputVCF} \ + vt view -h \ + ~{"-f " + filterExpression} \ + ~{inputVCF} \ + | vt normalize - \ -r ~{referenceFasta} \ ~{true="-m " false="" ignoreMaskedRef} \ - | vt decompose -s - -o ~{outputPath} + | vt decompose -s - \ + | vt view - \ + -c ~{compressionLevel} \ + -o ~{outputPath} + vt index ~{outputPath} } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" } runtime { + cpu: 1 memory: memory time_minutes: timeMinutes docker: dockerImage @@ -61,11 +73,15 @@ task Normalize { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + filterExpression: {description: "See https://genome.sph.umich.edu/wiki/Vt#Filters for valid expressions.", category: "common"} + compressionLevel: {description: "Compression level for the out vcf.gz file.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Normalized & decomposed VCF file."} + outputVcf: {description: "Normalized and decomposed VCF file."} + outputVcfIndex: {description: "Index for normalized and decomposed VCF file."} } } From 57018dd55c43af0013f48a61e5119128ccd87d3f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Apr 2025 11:28:56 +0200 Subject: [PATCH 1167/1208] Properly quote vt filter --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 4ced1d2a..635641e9 100644 --- a/vt.wdl +++ b/vt.wdl @@ -41,7 +41,7 @@ task Normalize { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" vt view -h \ - ~{"-f " + filterExpression} \ + ~{"-f '" + filterExpression}~{true="'" false="" defined(filterExpression)} \ ~{inputVCF} \ | vt normalize - \ -r ~{referenceFasta} \ From e39fe10360989d5074580034a4df030e16d27f4c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:08:55 +0200 Subject: [PATCH 1168/1208] do not intermingle singletons --- samtools.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index d724a692..ef89477d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -157,6 +157,7 @@ task Fastq { String outputRead1 String? outputRead2 String? outputRead0 + String? outputReadS Boolean appendReadNumber = false Boolean outputQuality = false @@ -177,8 +178,10 @@ task Fastq { samtools collate -u -O ~{inputBam} | \ samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ + ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ + ~{"-s " + outputReadS} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -192,6 +195,7 @@ task Fastq { File read1 = outputRead1 File? read2 = outputRead2 File? read0 = outputRead0 + File? readS = outputReadS } runtime { @@ -207,6 +211,7 @@ task Fastq { outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} + outputReadS: {description: "The location singleton reads should be written to.", category: "advanced"} appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`.", category: "advanced"} outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`.", category: "advanced"} From b9319418b7a96a0046b9c034649930ccd5cf4fa9 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:09:16 +0200 Subject: [PATCH 1169/1208] The caches in containers caused issues --- samtools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index ef89477d..315a00b5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,6 +571,9 @@ task Split { command { set -e mkdir -p "~{outputPath}/rg/" + + export XDG_CACHE_HOME=$PWD/.cache/ + export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 8ede8b774a0296fe484e9f78e25d5d358828099e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:15:01 +0200 Subject: [PATCH 1170/1208] add biopets validate fastq --- biopet.wdl | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 biopet.wdl diff --git a/biopet.wdl b/biopet.wdl new file mode 100644 index 00000000..ea8a36c8 --- /dev/null +++ b/biopet.wdl @@ -0,0 +1,60 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +task ValidateFastq { + input { + File inputRead1 + File? inputRead2 + + String memory = "1GiB" + Int timeMinutes = 5 + ceil(size(inputRead1, "GiB")) + String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--hdfd78af_3" + } + + command { + set -e + java -jar /usr/local/share/biopet-validatefastq-0.1.1-3/validatefastq-assembly-0.1.1.jar \ + --fastq1 ~{inputRead1} \ + ~{"--fastq2 " + inputRead2} + } + + output { + } + + runtime { + cpu: 1 + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputRead1: {description: "The location of the first FASTQ file (first reads for pairs, in case of paired-end sequencing).", category: "required"} + inputRead2: {description: "The location of the paired end reads.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 8493c77e477c5522b0947948b47e35be04974fc7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:19:23 +0200 Subject: [PATCH 1171/1208] require being explicit about locations --- samtools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 315a00b5..5bb2fb82 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -177,7 +177,6 @@ task Fastq { mkdir -p "$(dirname ~{outputRead1})" samtools collate -u -O ~{inputBam} | \ samtools fastq \ - ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ From 347ed91d4bff4306cea0074ca7f1c7fa2ff517b7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:20:09 +0200 Subject: [PATCH 1172/1208] Probably unnecessary --- samtools.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 5bb2fb82..0ef1419c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,8 +571,6 @@ task Split { set -e mkdir -p "~{outputPath}/rg/" - export XDG_CACHE_HOME=$PWD/.cache/ - export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 7ff2ac2c1ebab33a3872297beb189e648eb90724 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:22:10 +0200 Subject: [PATCH 1173/1208] documentation --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986dfd13..f20dc82d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,8 @@ version 6.0.0-dev + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Add `biopet.ValidateFastq` to check your fastq files for pairing and other correctness. ++ **Breaking**: `samtools.Fastq` now requires defining your singleton read location. This only affects you if you were previously using this task with only a single output read file. version 5.2.0 --------------------------- From 1ee07a657fd46f6dc227573c2c59d7ef4d0cd4b9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 10:49:41 +0200 Subject: [PATCH 1174/1208] add -no-upstream to snpeff task --- snpeff.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index 0f14e5b5..8718e01b 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -32,6 +32,7 @@ task SnpEff { Boolean hgvs = true Boolean lof = true Boolean noDownstream = false + Boolean noUpstream = false Boolean noIntergenic = false Boolean noShiftHgvs = false Int? upDownStreamLen @@ -39,7 +40,7 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" + String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" } command { @@ -55,6 +56,7 @@ task SnpEff { ~{true="-hgvs" false="-noHgvs" hgvs} \ ~{true="-lof" false="-noLof" lof} \ ~{true="-no-downstream" false="" noDownstream} \ + ~{true="-no-upstream" false="" noUpstream} \ ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ @@ -82,6 +84,7 @@ task SnpEff { hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"} lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"} noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"} + noUpstream: {description: "Equivalent to the `-no-upstream` flag.", category: "advanced"} noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"} noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"} upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"} From d9d989e07649ac3177f6464100e192418e716ce3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:22:36 +0200 Subject: [PATCH 1175/1208] Add snpsift filter --- CHANGELOG.md | 2 ++ snpsift.wdl | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 snpsift.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa40b75..7e209f1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Added a task for SnpSift filter. ++ Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. diff --git a/snpsift.wdl b/snpsift.wdl new file mode 100644 index 00000000..0bb413f6 --- /dev/null +++ b/snpsift.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Filter { + input { + File vcf + File? vcfIndex + String filterExpression + String outputPath = "./snpsift_filter.vcf" + + String memory = "9GiB" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + } + + command { + SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + filter \ + "~{filterExpression}" \ + ~{vcf} \ + > ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes # !UnknownRuntimeKey + memory: memory + } + + parameter_meta { + vcf: {description: "A VCF file to filter.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "common"} + filterExpression: {description: "The SnpSift filtering expression.", category: "required"} + outputPath: {description: "The path to write the output to.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From d4eb18d70d68e5c75539c272bf0db065e5f0bf71 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:35:56 +0200 Subject: [PATCH 1176/1208] add region input to bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 5 ++++- snpsift.wdl | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e209f1a..f13ab24b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. diff --git a/bcftools.wdl b/bcftools.wdl index 7df8911d..11864a00 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -349,6 +349,7 @@ task View { String? exclude String? include + String? region Array[String] samples = [] String memory = "256MiB" @@ -368,7 +369,8 @@ task View { ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ - ~{inputFile} + ~{inputFile} \ + ~{region} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } @@ -390,6 +392,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + region: {description: "The region to retrieve from the VCF file.", category: "common"} excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} samples: {description: "A list of sample names to include.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/snpsift.wdl b/snpsift.wdl index 0bb413f6..5bac6484 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -36,6 +36,8 @@ task Filter { } command { + set -e + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ "~{filterExpression}" \ From ecd2242e9a71f352a6b11683a969f9f5804cb18d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 12:14:13 +0200 Subject: [PATCH 1177/1208] add an ipnut for an index file in bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f13ab24b..7d5ad41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. diff --git a/bcftools.wdl b/bcftools.wdl index 11864a00..b923781c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -344,6 +344,7 @@ task Stats { task View { input { File inputFile + File? inputFileIndex String outputPath = "output.vcf" Boolean excludeUncalled = false @@ -389,6 +390,7 @@ task View { parameter_meta { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} + inputFileIndex: {description: "the index for the input file.", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} From abcddcda79a0821ef86bb0d1b40f2e5b7264e829 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 11:56:23 +0200 Subject: [PATCH 1178/1208] fix wdlTools parsing issue in bcftools annotate --- CHANGELOG.md | 2 ++ bcftools.wdl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d5ad41d..7ad69a3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Fixed an issue with the parameter_meta section of bcftools annotate + which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. diff --git a/bcftools.wdl b/bcftools.wdl index b923781c..6200a1a1 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -111,7 +111,7 @@ task Annotate { collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} - newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} + newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\\_%POS').", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} From 748fe367e1964e5014cdb60a3def6976f2846d3c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 14:51:52 +0200 Subject: [PATCH 1179/1208] change name of snpsift task --- snpsift.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpsift.wdl b/snpsift.wdl index 5bac6484..6b6a1feb 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -22,7 +22,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Filter { +task SnpSiftFilter { input { File vcf File? vcfIndex From 2fc90c9790b41781ca35144e0d495f293a614382 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 16:05:27 +0200 Subject: [PATCH 1180/1208] add a useless ls to check if a dnanexus error is caused by lazy loading --- bcftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 6200a1a1..5ab04c1c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,6 +362,8 @@ task View { command { set -e + ls ~{inputFileIndex} + mkdir -p "$(dirname ~{outputPath})" bcftools view \ ~{"--exclude " + exclude} \ From 82a5715109d7c352c016d2672cea27b0ab4eb7f0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 28 May 2025 09:14:43 +0200 Subject: [PATCH 1181/1208] add ls to snpeff, bcftools view and snpsift so I can see the paths when run on dnanexus --- bcftools.wdl | 2 +- snpeff.wdl | 1 + snpsift.wdl | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5ab04c1c..0381d4cf 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,7 +362,7 @@ task View { command { set -e - ls ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index 8718e01b..924db8db 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -45,6 +45,7 @@ task SnpEff { command { set -e + ls ~{vcf} ~{vcfIndex} mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ diff --git a/snpsift.wdl b/snpsift.wdl index 6b6a1feb..5daacd36 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -37,6 +37,8 @@ task SnpSiftFilter { command { set -e + ls ~{vcf} ~{vcfIndex} + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ From 0513965516fab2b2a6a4c9d146813e65ffa77b19 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 14:04:40 +0200 Subject: [PATCH 1182/1208] Update modkit.wdl --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 7546458a..424ba755 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 0.20) + 10 # Based on a linear model with some fudge (y=-0.13x - 4). + String memory = ceil(size(bam, "GiB") * 110) + 40 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From 58b52865e986970b7c49d10096afbf1d0eec8e84 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 15:37:47 +0200 Subject: [PATCH 1183/1208] Update modkit.wdl More reasonable bounds --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 424ba755..094f0041 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 110) + 40 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). + String memory = ceil(size(bam, "GiB") * 115) + 4 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From 5d4f097ad010fb12c4b7599511eaafc741b64932 Mon Sep 17 00:00:00 2001 From: Helena Date: Wed, 28 May 2025 16:01:43 +0200 Subject: [PATCH 1184/1208] re-correct it. --- modkit.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index 094f0041..4aecb517 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -129,7 +129,7 @@ task Summary { Int? seed Int threads = 4 - String memory = ceil(size(bam, "GiB") * 115) + 4 # Based on a linear model with some fudge (memory = 107 * file_size - 1.8). + String memory = ceil(size(bam, "GiB") * 0.1) + 5 # Based on a linear model with some fudge (memory = 0.07540 * file_size - 0.6). Int timeMinutes = 2880 / threads # 2 Days / threads String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.2--hcdda2d0_0" } From eafceb0f98e68feb884f8a947c15c29a2e52eb5b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 3 Jun 2025 15:52:08 +0200 Subject: [PATCH 1185/1208] WIP add option to output compressed VCF files to snpeff and snpsift --- snpeff.wdl | 10 ++++++++-- snpsift.wdl | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/snpeff.wdl b/snpeff.wdl index 924db8db..e1b520af 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -40,9 +40,12 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" + # Multicontainer with snpeff 5.2 and bgzip/tabix 1.19.1 + String dockerImage = "quay.io/biocontainers/mulled-v2-2fe536b56916bd1d61a6a1889eb2987d9ea0cd2f:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} @@ -61,12 +64,15 @@ task SnpEff { ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} rm -r $PWD/data } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { diff --git a/snpsift.wdl b/snpsift.wdl index 5daacd36..d964c255 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -44,11 +44,14 @@ task SnpSiftFilter { filter \ "~{filterExpression}" \ ~{vcf} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { From 16656ff77fa9f88577298fd7e8cc00c5eba02004 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 5 Jun 2025 11:38:01 +0200 Subject: [PATCH 1186/1208] update changelog, fix missing variable --- CHANGELOG.md | 1 + snpsift.wdl | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ad69a3e..7de262af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. diff --git a/snpsift.wdl b/snpsift.wdl index d964c255..4c354f48 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -32,9 +32,12 @@ task SnpSiftFilter { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + # Multicontainer with SnpSift 5.2 and bgzip/tabix 1.22 + String dockerImage = "quay.io/biocontainers/mulled-v2-d4bc0c23eb1d95c7ecff7f0e8b3a4255503fd5d4:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} From e9189a7f5d61a46d1deec0108900a11d70630933 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Fri, 6 Jun 2025 10:55:53 +0200 Subject: [PATCH 1187/1208] missing trailing slash breaks samtools flagstat --- samtools.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 743fce0c..ac2e868a 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -296,9 +296,10 @@ task Flagstat { command { set -e mkdir -p "$(dirname ~{outputPath})" + samtools flagstat \ - --threads ~{threads - 1} - ~{inputBam} > ~{outputPath} + --threads ~{threads - 1} \ + ~{inputBam} > ~{outputPath} } output { From 69a9c0a6751f78cfaa75c325fc49425113e268b1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 11:56:44 +0200 Subject: [PATCH 1188/1208] Add a task for bcftools norm --- CHANGELOG.md | 1 + bcftools.wdl | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7de262af..003aa97a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add a task for bcftools norm. + Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. diff --git a/bcftools.wdl b/bcftools.wdl index 0381d4cf..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -180,6 +180,67 @@ task Filter { } } +task Norm { + input { + File inputFile + File? inputFileIndex + String outputPath = "output.vcf.gz" + + File? fasta + String? regions + Boolean splitMultiallelicSites = false + + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + + command { + set -e + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + + mkdir -p "$(dirname ~{outputPath})" + bcftools norm \ + -o ~{outputPath} \ + -O ~{true="z" false="v" compressed} \ + ~{"--regions " + regions} \ + ~{"--fasta " + fasta} \ + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + + ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} + } + + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + fasta: {description: "Equivalent to bcftools norm's `--fasta` option.", category: "advanced"} + regions: {description: "Equivalent to bcftools norm's `--regions` option.", category: "advanced"} + splitMultiallelicSites: {description: "Whether multiallelic lines should be split up.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file."} + outputVcfIndex: {description: "Index of sorted VCF file."} + } +} + task Sort { input { File inputFile From 5d4f5a7fa3846dea7b8a16fce9c47d8674a5f260 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:26:56 +0200 Subject: [PATCH 1189/1208] more time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..5fb06016 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c97c55a47411b2395289ed3bf0357d8686dc7350 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:38:14 +0200 Subject: [PATCH 1190/1208] more time for bcftools Norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5fb06016..56564b17 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 + Int timeMinutes = 5 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 88ac2526f86f5a89d6de0fe74077f6bab05baf8d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:52:37 +0200 Subject: [PATCH 1191/1208] reset time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 56564b17..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 3c8ec631930d4ec7df1d01ba802d5943257dfd42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:58:40 +0200 Subject: [PATCH 1192/1208] fix bcftools norm --- bcftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..ae2dee4e 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -207,7 +207,8 @@ task Norm { -O ~{true="z" false="v" compressed} \ ~{"--regions " + regions} \ ~{"--fasta " + fasta} \ - ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} \ + ~{inputFile} ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} } From 5dab6c7b08f05e831ea110b44acdefedc298f67b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 13:36:41 +0200 Subject: [PATCH 1193/1208] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index ae2dee4e..b48956cc 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "2GiB" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 99c562c5e8ed51e8a2a04ec5dc72dada5248ff9e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 15:08:05 +0200 Subject: [PATCH 1194/1208] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index b48956cc..7e297bc7 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "10GiB" + String memory = "64GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 99b9aca2b67d6a5d138c0b4dc9317f6b03bbc395 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 11 Jun 2025 14:24:53 +0200 Subject: [PATCH 1195/1208] Disable ai in multiqc --- CHANGELOG.md | 1 + multiqc.wdl | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44f71e4b..e0b036ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ version 6.0.0-dev + Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. ++ Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. version 5.2.0 --------------------------- diff --git a/multiqc.wdl b/multiqc.wdl index fae52178..18667b91 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -39,6 +39,7 @@ task MultiQC { # This must be actively enabled in my opinion. # The tools default is to upload. Boolean megaQCUpload = false + Boolean enableAi = false Int? dirsDepth String? title @@ -124,6 +125,7 @@ task MultiQC { ~{true="--lint" false="" lint} \ ~{true="--pdf" false="" pdf} \ ~{false="--no-megaqc-upload" true="" megaQCUpload} \ + ~{false="--no-ai" true="" enableAi} \ ~{"--config " + config} \ ~{"--cl-config " + clConfig } \ ~{reportDir} @@ -159,6 +161,7 @@ task MultiQC { lint: {description: "Equivalent to MultiQC's `--lint` flag.", category: "advanced"} pdf: {description: "Equivalent to MultiQC's `--pdf` flag.", category: "advanced"} megaQCUpload: {description: "Opposite to MultiQC's `--no-megaqc-upload` flag.", category: "advanced"} + enableAi: {description: "Opposite to MultiQC's `--no-ai` flag.", category: "advanced"} dirsDepth: {description: "Equivalent to MultiQC's `--dirs-depth` option.", category: "advanced"} title: {description: "Equivalent to MultiQC's `--title` option.", category: "advanced"} comment: {description: "Equivalent to MultiQC's `--comment` option.", category: "advanced"} From c882527a4c6e3c476a2a1ba15319b30d70f6dc53 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 17 Jun 2025 10:46:19 +0200 Subject: [PATCH 1196/1208] Support supplying additional reports/config to multiqc --- CHANGELOG.md | 1 + multiqc.wdl | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b036ac..42542531 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ version 6.0.0-dev + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. ++ Support providing additional reports to MultiQC in workflow configuration. version 5.2.0 --------------------------- diff --git a/multiqc.wdl b/multiqc.wdl index 18667b91..8f05a36e 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -53,6 +53,7 @@ task MultiQC { File? fileList Array[String]+? exclude Array[String]+? module + Array[File]+? additionalReports String? dataFormat File? config # A directory String? clConfig @@ -79,13 +80,15 @@ task MultiQC { # strategy. Using python's builtin hash is unique enough # for these purposes. + Array[File] allReports = flatten([reports, select_all([additionalReports])]) + command { python3 < Date: Wed, 18 Jun 2025 08:38:36 +0200 Subject: [PATCH 1197/1208] ahmust be flatter --- multiqc.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/multiqc.wdl b/multiqc.wdl index 8f05a36e..db47ac87 100644 --- a/multiqc.wdl +++ b/multiqc.wdl @@ -80,7 +80,7 @@ task MultiQC { # strategy. Using python's builtin hash is unique enough # for these purposes. - Array[File] allReports = flatten([reports, select_all([additionalReports])]) + Array[File] allReports = flatten([reports, flatten(select_all([additionalReports]))]) command { python3 < Date: Mon, 7 Jul 2025 11:27:17 +0200 Subject: [PATCH 1198/1208] bump bedtools sort --- bedtools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index a5d8aab3..50acd42d 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -209,7 +209,7 @@ task Sort { String memory = "~{512 + ceil(size(inputBed, "MiB"))}MiB" Int timeMinutes = 1 + ceil(size(inputBed, "GiB")) - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_2" } command { From c8fbb60c214cb3defe74b71aa60df6eefbae594a Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 7 Jul 2025 11:28:18 +0200 Subject: [PATCH 1199/1208] document --- CHANGELOG.md | 1 + bedtools.wdl | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b036ac..920993ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ bedtools.Sort: bumped container version to permit use of `faidx`. + Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. diff --git a/bedtools.wdl b/bedtools.wdl index 50acd42d..7fcce28f 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -224,7 +224,7 @@ task Sort { ~{true="-chrThenScoreA" false="" chrThenScoreA} \ ~{true="-chrThenScoreD" false="" chrThenScoreD} \ ~{"-g " + genome} \ - ~{"-faidx" + faidx} \ + ~{"-faidx " + faidx} \ > ~{outputBed} } From 633d0bc76ffb27f09b957e7eb26153a8f3d5edac Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Mon, 7 Jul 2025 12:33:54 +0200 Subject: [PATCH 1200/1208] Add no name check support, requiring bumping Intersect as well --- bedtools.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bedtools.wdl b/bedtools.wdl index 7fcce28f..64fccc7b 100644 --- a/bedtools.wdl +++ b/bedtools.wdl @@ -270,10 +270,11 @@ task Intersect { Boolean writeA = false Boolean writeB = false Boolean stranded = false + Boolean nonamecheck = false String memory = "~{512 + ceil(size([regionsA, regionsB], "MiB"))}MiB" Int timeMinutes = 1 + ceil(size([regionsA, regionsB], "GiB")) - String dockerImage = "quay.io/biocontainers/bedtools:2.23.0--hdbcaa40_3" + String dockerImage = "quay.io/biocontainers/bedtools:2.31.1--hf5e1c6e_2" } Boolean sorted = defined(faidx) @@ -289,6 +290,7 @@ task Intersect { ~{true="-wb" false="" writeB} \ ~{true="-s" false="" stranded} \ ~{true="-sorted" false="" sorted} \ + ~{true="-nonamecheck" false="" nonamecheck} \ ~{true="-g sorted.genome" false="" sorted} \ > ~{outputBed} } @@ -313,6 +315,7 @@ task Intersect { writeA: {description: "Write the original entry in A for each overlap.", category: "advanced"} writeB: {description: "Write the original entry in B for each overlap. Useful for knowing what A overlaps.", category: "advanced"} stranded: {description: "Force “strandedness”. That is, only report hits in B that overlap A on the same strand. By default, overlaps are reported without respect to strand.", category: "advanced"} + nonamecheck: {description: "Disable the bedtools intersect name check. This is used to catch chr1 vs chr01 or chr1 vs 1 naming inconsistencies. However, it throws an error for GIAB hg38 which has capital letters. https://github.com/arq5x/bedtools2/issues/648", category: "advanced"} memory: {description: "The amount of memory needed for the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} From c676fe2198b18b437a3e279c240290b7227b94d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 17 Jul 2025 10:37:57 +0200 Subject: [PATCH 1201/1208] address review comments --- bcftools.wdl | 13 ++++++++----- snpeff.wdl | 7 ++++++- snpsift.wdl | 7 ++++++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 7e297bc7..31c7db13 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,8 +190,9 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "64GiB" + String memory = "4GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int diskGb = ceil(2.1 * size(inputFile, "G") + size(fasta, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -199,7 +200,7 @@ task Norm { command { set -e - ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools norm \ @@ -222,6 +223,7 @@ task Norm { memory: memory time_minutes: timeMinutes docker: dockerImage + disks: "local-disk ~{diskGb} SSD" # Based on an example in dxCompiler docs } parameter_meta { @@ -234,11 +236,12 @@ task Norm { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + diskGb: {description: "The amount of disk space needed for this job in GiB.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Sorted VCF file."} - outputVcfIndex: {description: "Index of sorted VCF file."} + outputVcf: {description: "Normalized VCF file."} + outputVcfIndex: {description: "Index of Normalized VCF file."} } } @@ -424,7 +427,7 @@ task View { command { set -e - ls ~{inputFile} ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index e1b520af..b972ab30 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -48,7 +48,7 @@ task SnpEff { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -82,6 +82,7 @@ task SnpEff { } parameter_meta { + # inputs vcf: {description: "A VCF file to analyse.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "required"} genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"} @@ -102,5 +103,9 @@ task SnpEff { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Annotated VCF file."} + outputVcfIndex: {description: "Index of annotated VCF file."} } } diff --git a/snpsift.wdl b/snpsift.wdl index 4c354f48..a62f7295 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -40,7 +40,7 @@ task SnpSiftFilter { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -64,6 +64,7 @@ task SnpSiftFilter { } parameter_meta { + # inputs vcf: {description: "A VCF file to filter.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "common"} filterExpression: {description: "The SnpSift filtering expression.", category: "required"} @@ -75,5 +76,9 @@ task SnpSiftFilter { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Filtered VCF file."} + outputVcfIndex: {description: "Index of filtered VCF file."} } } From 9e9ae08503c7c2e10c0fe16d018bfb2810c4f3de Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 11:36:55 +0200 Subject: [PATCH 1202/1208] Update clair3 image --- CHANGELOG.md | 1 + clair3.wdl | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c56b124a..5cabdece 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ version 6.0.0-dev + Add support for filterThreshold/filterPercent for `modkit.Pileup`. + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. ++ Update clair3 version from 1.0.11 to 1.1.0 version 5.2.0 --------------------------- diff --git a/clair3.wdl b/clair3.wdl index 5a6154af..ae54ef40 100644 --- a/clair3.wdl +++ b/clair3.wdl @@ -34,8 +34,8 @@ task Clair3 { Int threads = 8 Boolean includeAllCtgs = false String memory = "~{threads + 16}GiB" - Int timeMinutes = 10 + ceil(size(bam, "G") * 400 / threads) - String dockerImage = "quay.io/biocontainers/clair3:1.0.11--py39hd649744_0" + Int timeMinutes = 10 + ceil(size(bam, "G") * 200 / threads) + String dockerImage = "quay.io/biocontainers/clair3:1.1.0--py39hd649744_0" } String modelArg = "~{if defined(modelTar) then basename(select_first([modelTar]), '.tar.gz') else builtinModel}" @@ -91,4 +91,4 @@ task Clair3 { vcfIndex: {description: "Output VCF index."} } -} \ No newline at end of file +} From d648745cfeedbc816081547f9772f0ee2d9f1692 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:13:43 +0200 Subject: [PATCH 1203/1208] Improve whatshap runtime/memory estimates --- CHANGELOG.md | 1 + whatshap.wdl | 20 ++++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cabdece..cfb8f41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,7 @@ version 6.0.0-dev + Add `modkit.Summary` task. + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. + Update clair3 version from 1.0.11 to 1.1.0 ++ Improve whatshap runtime/memory usage for our cluster. version 5.2.0 --------------------------- diff --git a/whatshap.wdl b/whatshap.wdl index da86ad82..beef5e99 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -40,12 +40,19 @@ task Phase { String memory = "4GiB" Int timeMinutes = 120 + + String memory = 2 + ceil(size(bam, "G") / 20 ) + Int timeMinutes = 400 + ceil(size(bam, "G") * 0.9 ) + # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { set -e + + mkdir -p $(dirname ~{outputVCF}) + whatshap phase \ ~{vcf} \ ~{phaseInput} \ @@ -110,12 +117,16 @@ task Stats { String? chromosome String memory = "4GiB" - Int timeMinutes = 120 + Int timeMinutes = 30 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" } command { + set -e + + mkdir -p $(dirname ~{tsv}) + whatshap stats \ ~{vcf} \ ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \ @@ -169,7 +180,9 @@ task Haplotag { String? regions String? sample - String memory = "4GiB" + String memory = 2 + ceil(size(bam, "G") / 50 ) + Int timeMinutes = 50 + ceil(size(bam, "G") * 2 ) + Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -177,6 +190,9 @@ task Haplotag { command { set -e + + mkdir -p $(dirname ~{outputFile}) + whatshap haplotag \ ~{vcf} \ ~{alignments} \ From 7e246b01de31489577c434f69a5adbd2ab7cea2c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:18:54 +0200 Subject: [PATCH 1204/1208] Add modkit tasks --- CHANGELOG.md | 3 + modkit.wdl | 191 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 193 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfb8f41d..bf9d9238 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,9 @@ version 6.0.0-dev + Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. + Update clair3 version from 1.0.11 to 1.1.0 + Improve whatshap runtime/memory usage for our cluster. ++ Add `Modkit.SampleProbs` ++ Add `Modkit.DmrMulti` ++ Add `Modkit.DmrMultiInputPrep` to construct the command line for `Modkit.DmrMulti` version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index 678e326a..a35d8ed2 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -130,7 +130,7 @@ task Summary { Int threads = 4 String memory = ceil(size(bam, "GiB") * 0.1) + 5 # Based on a linear model with some fudge (memory = 0.07540 * file_size - 0.6). - Int timeMinutes = 2880 / threads # 2 Days / threads + Int timeMinutes = 60 # originally this was set at "2 Days / threads" but with 4 threads and that much ram, it's pretty fast. String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" } @@ -177,3 +177,192 @@ task Summary { summaryReport: {description: "The output modkit summary."} } } + +task SampleProbs { + input { + File bam + File bamIndex + + String summary = "modkit-sample-probs" + + Boolean sample = true + Int? numReads # = 10042 + Float? samplingFrac # = 0.1 + Int? seed + + Int threads = 4 + String memory = "32G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p ~{summary} + + modkit sample-probs \ + --threads ~{threads} \ + --out-dir ~{summary} \ + ~{true="" false="--no-sampling" sample} \ + ~{"--num-reads " + numReads} \ + ~{"--sampling-frac " + samplingFrac} \ + ~{"--seed " + seed} \ + --hist \ + ~{bam} + >>> + + output { + File reportCounts = "~{summary}/counts.html" + File reportProportion = "~{summary}/proportion.html" + File reportProbabilitiesTsv = "~{summary}/probabilities.tsv" + File reportThresholdsTsv = "~{summary}/thresholds.tsv" + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} + +task DmrMultiInputPrep { + input { + Array[File] control + Array[File] condition + String controlName + String conditionName + + Int threads = 1 + String memory = "1G" + Int timeMinutes = 5 + String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" + } + + command <<< + cat > modkit_dmr.py <<'CODE' + #!/usr/bin/env python3 + import argparse + parser = argparse.ArgumentParser() + parser.add_argument('--control_n', type=str, default='control') + parser.add_argument('--control_f', type=str,nargs='+') + parser.add_argument('--condition_n', type=str, default='condition') + parser.add_argument('--condition_f', type=str,nargs='+') + args = parser.parse_args() + modkit = [] + for i, x in enumerate(args.control_f): + modkit.extend(['-s', x, f'{args.control_n}{i}']) + for i, x in enumerate(args.condition_f): + modkit.extend(['-s', x, f'{args.condition_n}{i}']) + print(' '.join(modkit), end='') + CODE + + python modkit_dmr.py \ + --control_n ~{controlName} \ + --control_f ~{sep=" " control} \ + --condition_n ~{conditionName} \ + --condition_f ~{sep=" " condition} + >>> + + output { + String params = select_first(read_lines(stdout())) + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } +} + + +task DmrMulti { + input { + String dmrMultiArguments + Array[File] control + Array[File] condition + + Array[File] controlIndex + Array[File] conditionIndex + + String controlName + String conditionName + + File referenceFasta + File referenceFastaFai + String dmr_dir = "results" + + File? cpg_islands + + Int threads = 4 + String memory = "32G" + Int timeMinutes = 600 + String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" + } + + command <<< + set -e + mkdir -p ~{dmr_dir} + + modkit dmr multi \ + ~{dmrMultiArguments} \ + --out-dir ~{dmr_dir} \ + ~{"--regions-bed " + cpg_islands} \ + --ref ~{referenceFasta} \ + --base C \ + --threads ~{threads} \ + --header \ + --log-filepath dmr_multi.log + >>> + + output { + # TODO: other files + File log = "dmr_multi.log" + } + + runtime { + docker: dockerImage + cpu: threads + memory: memory + time_minutes: timeMinutes + } + + parameter_meta { + # input + bam: {description: "The input alignment file", category: "required"} + bamIndex: {description: "The index for the input alignment file", category: "required"} + + sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} + numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} + samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} + seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} + + threads: {description: "The number of threads to use.", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + summaryReport: {description: "The output modkit summary."} + } +} From 8564f8c0a757cecd00155de011e10a1e51ab32cf Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:29:51 +0200 Subject: [PATCH 1205/1208] I don't feel like documenting it if it isn't used --- CHANGELOG.md | 2 - modkit.wdl | 122 --------------------------------------------------- 2 files changed, 124 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf9d9238..bb09f4f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,8 +57,6 @@ version 6.0.0-dev + Update clair3 version from 1.0.11 to 1.1.0 + Improve whatshap runtime/memory usage for our cluster. + Add `Modkit.SampleProbs` -+ Add `Modkit.DmrMulti` -+ Add `Modkit.DmrMultiInputPrep` to construct the command line for `Modkit.DmrMulti` version 5.2.0 --------------------------- diff --git a/modkit.wdl b/modkit.wdl index a35d8ed2..b38929f5 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -244,125 +244,3 @@ task SampleProbs { summaryReport: {description: "The output modkit summary."} } } - -task DmrMultiInputPrep { - input { - Array[File] control - Array[File] condition - String controlName - String conditionName - - Int threads = 1 - String memory = "1G" - Int timeMinutes = 5 - String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0" - } - - command <<< - cat > modkit_dmr.py <<'CODE' - #!/usr/bin/env python3 - import argparse - parser = argparse.ArgumentParser() - parser.add_argument('--control_n', type=str, default='control') - parser.add_argument('--control_f', type=str,nargs='+') - parser.add_argument('--condition_n', type=str, default='condition') - parser.add_argument('--condition_f', type=str,nargs='+') - args = parser.parse_args() - modkit = [] - for i, x in enumerate(args.control_f): - modkit.extend(['-s', x, f'{args.control_n}{i}']) - for i, x in enumerate(args.condition_f): - modkit.extend(['-s', x, f'{args.condition_n}{i}']) - print(' '.join(modkit), end='') - CODE - - python modkit_dmr.py \ - --control_n ~{controlName} \ - --control_f ~{sep=" " control} \ - --condition_n ~{conditionName} \ - --condition_f ~{sep=" " condition} - >>> - - output { - String params = select_first(read_lines(stdout())) - } - - runtime { - docker: dockerImage - cpu: threads - memory: memory - time_minutes: timeMinutes - } -} - - -task DmrMulti { - input { - String dmrMultiArguments - Array[File] control - Array[File] condition - - Array[File] controlIndex - Array[File] conditionIndex - - String controlName - String conditionName - - File referenceFasta - File referenceFastaFai - String dmr_dir = "results" - - File? cpg_islands - - Int threads = 4 - String memory = "32G" - Int timeMinutes = 600 - String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0" - } - - command <<< - set -e - mkdir -p ~{dmr_dir} - - modkit dmr multi \ - ~{dmrMultiArguments} \ - --out-dir ~{dmr_dir} \ - ~{"--regions-bed " + cpg_islands} \ - --ref ~{referenceFasta} \ - --base C \ - --threads ~{threads} \ - --header \ - --log-filepath dmr_multi.log - >>> - - output { - # TODO: other files - File log = "dmr_multi.log" - } - - runtime { - docker: dockerImage - cpu: threads - memory: memory - time_minutes: timeMinutes - } - - parameter_meta { - # input - bam: {description: "The input alignment file", category: "required"} - bamIndex: {description: "The index for the input alignment file", category: "required"} - - sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} - numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} - samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"} - seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"} - - threads: {description: "The number of threads to use.", category: "advanced"} - memory: {description: "The amount of memory this job will use.", category: "advanced"} - timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} - - # output - summaryReport: {description: "The output modkit summary."} - } -} From 040a43e6723d672f97f126a1628e773d75fb6515 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 12:29:51 +0200 Subject: [PATCH 1206/1208] I don't feel like documenting it if it isn't used --- modkit.wdl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modkit.wdl b/modkit.wdl index b38929f5..ddf4dbf7 100644 --- a/modkit.wdl +++ b/modkit.wdl @@ -229,6 +229,7 @@ task SampleProbs { # input bam: {description: "The input alignment file", category: "required"} bamIndex: {description: "The index for the input alignment file", category: "required"} + summary: {description: "A folder for the outputs", category: "required"} sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"} numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"} @@ -241,6 +242,9 @@ task SampleProbs { dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # output - summaryReport: {description: "The output modkit summary."} + reportCounts: {description: "The output html report of counts"} + reportProportion: {description: "The output html report of proportions"} + reportProbabilitiesTsv: {description: "The output TSV of Probabilities"} + reportThresholdsTsv: {description: "The output TSV of thresholds"} } } From bd54aeb0bcdd67db3fb180b890954bca92000287 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 13:13:43 +0200 Subject: [PATCH 1207/1208] incorrect inputs --- whatshap.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index beef5e99..3b2bd1d3 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -41,8 +41,8 @@ task Phase { String memory = "4GiB" Int timeMinutes = 120 - String memory = 2 + ceil(size(bam, "G") / 20 ) - Int timeMinutes = 400 + ceil(size(bam, "G") * 0.9 ) + String memory = 2 + ceil(size(phaseInput, "G") / 20 ) + Int timeMinutes = 400 + ceil(size(phaseInput, "G") * 0.9 ) # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" @@ -180,8 +180,8 @@ task Haplotag { String? regions String? sample - String memory = 2 + ceil(size(bam, "G") / 50 ) - Int timeMinutes = 50 + ceil(size(bam, "G") * 2 ) + String memory = 2 + ceil(size(alignments, "G") / 50 ) + Int timeMinutes = 50 + ceil(size(alignments, "G") * 2 ) Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. From e7061594546ceac5e7bbcdc48877bc78b5ec795c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Tue, 22 Jul 2025 15:46:06 +0200 Subject: [PATCH 1208/1208] Fix duplicate declarations --- whatshap.wdl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/whatshap.wdl b/whatshap.wdl index 3b2bd1d3..b491f566 100644 --- a/whatshap.wdl +++ b/whatshap.wdl @@ -38,9 +38,6 @@ task Phase { String? threshold String? ped - String memory = "4GiB" - Int timeMinutes = 120 - String memory = 2 + ceil(size(phaseInput, "G") / 20 ) Int timeMinutes = 400 + ceil(size(phaseInput, "G") * 0.9 ) @@ -183,7 +180,6 @@ task Haplotag { String memory = 2 + ceil(size(alignments, "G") / 50 ) Int timeMinutes = 50 + ceil(size(alignments, "G") * 2 ) - Int timeMinutes = 120 # Whatshap 1.0, tabix 0.2.5. String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0" }