From 1ee07a657fd46f6dc227573c2c59d7ef4d0cd4b9 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 10:49:41 +0200 Subject: [PATCH 01/18] add -no-upstream to snpeff task --- snpeff.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snpeff.wdl b/snpeff.wdl index 0f14e5b5..8718e01b 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -32,6 +32,7 @@ task SnpEff { Boolean hgvs = true Boolean lof = true Boolean noDownstream = false + Boolean noUpstream = false Boolean noIntergenic = false Boolean noShiftHgvs = false Int? upDownStreamLen @@ -39,7 +40,7 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.0--0" + String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" } command { @@ -55,6 +56,7 @@ task SnpEff { ~{true="-hgvs" false="-noHgvs" hgvs} \ ~{true="-lof" false="-noLof" lof} \ ~{true="-no-downstream" false="" noDownstream} \ + ~{true="-no-upstream" false="" noUpstream} \ ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ @@ -82,6 +84,7 @@ task SnpEff { hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"} lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"} noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"} + noUpstream: {description: "Equivalent to the `-no-upstream` flag.", category: "advanced"} noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"} noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"} upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"} From d9d989e07649ac3177f6464100e192418e716ce3 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:22:36 +0200 Subject: [PATCH 02/18] Add snpsift filter --- CHANGELOG.md | 2 ++ snpsift.wdl | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 snpsift.wdl diff --git a/CHANGELOG.md b/CHANGELOG.md index dfa40b75..7e209f1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Added a task for SnpSift filter. ++ Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. diff --git a/snpsift.wdl b/snpsift.wdl new file mode 100644 index 00000000..0bb413f6 --- /dev/null +++ b/snpsift.wdl @@ -0,0 +1,69 @@ +version 1.0 + +# MIT License +# +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Filter { + input { + File vcf + File? vcfIndex + String filterExpression + String outputPath = "./snpsift_filter.vcf" + + String memory = "9GiB" + String javaXmx = "8G" + Int timeMinutes = 60 + String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + } + + command { + SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ + filter \ + "~{filterExpression}" \ + ~{vcf} \ + > ~{outputPath} + } + + output { + File outputVcf = outputPath + } + + runtime { + docker: dockerImage + time_minutes: timeMinutes # !UnknownRuntimeKey + memory: memory + } + + parameter_meta { + vcf: {description: "A VCF file to filter.", category: "required"} + vcfIndex: {description: "The index for the VCF file.", category: "common"} + filterExpression: {description: "The SnpSift filtering expression.", category: "required"} + outputPath: {description: "The path to write the output to.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", + category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", + category: "advanced"} + } +} From d4eb18d70d68e5c75539c272bf0db065e5f0bf71 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 11:35:56 +0200 Subject: [PATCH 03/18] add region input to bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 5 ++++- snpsift.wdl | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e209f1a..f13ab24b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. + Update vt task to allow a filter expression and compress and index the output. diff --git a/bcftools.wdl b/bcftools.wdl index 7df8911d..11864a00 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -349,6 +349,7 @@ task View { String? exclude String? include + String? region Array[String] samples = [] String memory = "256MiB" @@ -368,7 +369,8 @@ task View { ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \ -o ~{outputPath} \ -O ~{true="z" false="v" compressed} \ - ~{inputFile} + ~{inputFile} \ + ~{region} ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} } @@ -390,6 +392,7 @@ task View { outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} + region: {description: "The region to retrieve from the VCF file.", category: "common"} excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"} samples: {description: "A list of sample names to include.", category: "advanced"} memory: {description: "The amount of memory this job will use.", category: "advanced"} diff --git a/snpsift.wdl b/snpsift.wdl index 0bb413f6..5bac6484 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -36,6 +36,8 @@ task Filter { } command { + set -e + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ "~{filterExpression}" \ From ecd2242e9a71f352a6b11683a969f9f5804cb18d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Mon, 26 May 2025 12:14:13 +0200 Subject: [PATCH 04/18] add an ipnut for an index file in bcftools view --- CHANGELOG.md | 1 + bcftools.wdl | 2 ++ 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f13ab24b..7d5ad41d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. + Updated the snpEff task to allow setting the `-no-upstream` flag. diff --git a/bcftools.wdl b/bcftools.wdl index 11864a00..b923781c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -344,6 +344,7 @@ task Stats { task View { input { File inputFile + File? inputFileIndex String outputPath = "output.vcf" Boolean excludeUncalled = false @@ -389,6 +390,7 @@ task View { parameter_meta { # inputs inputFile: {description: "A vcf or bcf file.", category: "required"} + inputFileIndex: {description: "the index for the input file.", category: "common"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} From abcddcda79a0821ef86bb0d1b40f2e5b7264e829 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 11:56:23 +0200 Subject: [PATCH 05/18] fix wdlTools parsing issue in bcftools annotate --- CHANGELOG.md | 2 ++ bcftools.wdl | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7d5ad41d..7ad69a3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Fixed an issue with the parameter_meta section of bcftools annotate + which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. + Updated the bcftools view task to allow specifying a region. + Added a task for SnpSift filter. diff --git a/bcftools.wdl b/bcftools.wdl index b923781c..6200a1a1 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -111,7 +111,7 @@ task Annotate { collapse: {description: "Treat as identical records with , see man page for details.", category: "advanced"} exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"} headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"} - newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"} + newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\\_%POS').", category: "advanced"} include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"} markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"} regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"} From 748fe367e1964e5014cdb60a3def6976f2846d3c Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 14:51:52 +0200 Subject: [PATCH 06/18] change name of snpsift task --- snpsift.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snpsift.wdl b/snpsift.wdl index 5bac6484..6b6a1feb 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -22,7 +22,7 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task Filter { +task SnpSiftFilter { input { File vcf File? vcfIndex From 2fc90c9790b41781ca35144e0d495f293a614382 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 27 May 2025 16:05:27 +0200 Subject: [PATCH 07/18] add a useless ls to check if a dnanexus error is caused by lazy loading --- bcftools.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bcftools.wdl b/bcftools.wdl index 6200a1a1..5ab04c1c 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,6 +362,8 @@ task View { command { set -e + ls ~{inputFileIndex} + mkdir -p "$(dirname ~{outputPath})" bcftools view \ ~{"--exclude " + exclude} \ From 82a5715109d7c352c016d2672cea27b0ab4eb7f0 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Wed, 28 May 2025 09:14:43 +0200 Subject: [PATCH 08/18] add ls to snpeff, bcftools view and snpsift so I can see the paths when run on dnanexus --- bcftools.wdl | 2 +- snpeff.wdl | 1 + snpsift.wdl | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5ab04c1c..0381d4cf 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -362,7 +362,7 @@ task View { command { set -e - ls ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index 8718e01b..924db8db 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -45,6 +45,7 @@ task SnpEff { command { set -e + ls ~{vcf} ~{vcfIndex} mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ diff --git a/snpsift.wdl b/snpsift.wdl index 6b6a1feb..5daacd36 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -37,6 +37,8 @@ task SnpSiftFilter { command { set -e + ls ~{vcf} ~{vcfIndex} + mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ filter \ From eafceb0f98e68feb884f8a947c15c29a2e52eb5b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 3 Jun 2025 15:52:08 +0200 Subject: [PATCH 09/18] WIP add option to output compressed VCF files to snpeff and snpsift --- snpeff.wdl | 10 ++++++++-- snpsift.wdl | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/snpeff.wdl b/snpeff.wdl index 924db8db..e1b520af 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -40,9 +40,12 @@ task SnpEff { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpeff:5.2--hdfd78af_1" + # Multicontainer with snpeff 5.2 and bgzip/tabix 1.19.1 + String dockerImage = "quay.io/biocontainers/mulled-v2-2fe536b56916bd1d61a6a1889eb2987d9ea0cd2f:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} @@ -61,12 +64,15 @@ task SnpEff { ~{true="-no-intergenic" false="" noIntergenic} \ ~{true="-noShiftHgvs" false="" noShiftHgvs} \ ~{"-upDownStreamLen " + upDownStreamLen} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} rm -r $PWD/data } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { diff --git a/snpsift.wdl b/snpsift.wdl index 5daacd36..d964c255 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -44,11 +44,14 @@ task SnpSiftFilter { filter \ "~{filterExpression}" \ ~{vcf} \ - > ~{outputPath} + ~{if compressed then "| bgzip " else ""} > ~{outputPath} + + ~{if compressed then "tabix ~{outputPath}" else ""} } output { File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" } runtime { From 16656ff77fa9f88577298fd7e8cc00c5eba02004 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 5 Jun 2025 11:38:01 +0200 Subject: [PATCH 10/18] update changelog, fix missing variable --- CHANGELOG.md | 1 + snpsift.wdl | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ad69a3e..7de262af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. + Updated the bcftools view task with an input for an index file. diff --git a/snpsift.wdl b/snpsift.wdl index d964c255..4c354f48 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -32,9 +32,12 @@ task SnpSiftFilter { String memory = "9GiB" String javaXmx = "8G" Int timeMinutes = 60 - String dockerImage = "quay.io/biocontainers/snpsift:5.2--hdfd78af_0" + # Multicontainer with SnpSift 5.2 and bgzip/tabix 1.22 + String dockerImage = "quay.io/biocontainers/mulled-v2-d4bc0c23eb1d95c7ecff7f0e8b3a4255503fd5d4:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0" } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e ls ~{vcf} ~{vcfIndex} From 69a9c0a6751f78cfaa75c325fc49425113e268b1 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 11:56:44 +0200 Subject: [PATCH 11/18] Add a task for bcftools norm --- CHANGELOG.md | 1 + bcftools.wdl | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7de262af..003aa97a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add a task for bcftools norm. + Add support for outputting compressed files to snpeff and snpsift. + Fixed an issue with the parameter_meta section of bcftools annotate which caused wdlTools to error on parsing the file. diff --git a/bcftools.wdl b/bcftools.wdl index 0381d4cf..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -180,6 +180,67 @@ task Filter { } } +task Norm { + input { + File inputFile + File? inputFileIndex + String outputPath = "output.vcf.gz" + + File? fasta + String? regions + Boolean splitMultiallelicSites = false + + String memory = "2GiB" + Int timeMinutes = 1 + ceil(size(inputFile, "G")) + String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" + } + + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + + command { + set -e + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + + mkdir -p "$(dirname ~{outputPath})" + bcftools norm \ + -o ~{outputPath} \ + -O ~{true="z" false="v" compressed} \ + ~{"--regions " + regions} \ + ~{"--fasta " + fasta} \ + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + + ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} + } + + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + inputFile: {description: "A vcf or bcf file.", category: "required"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + fasta: {description: "Equivalent to bcftools norm's `--fasta` option.", category: "advanced"} + regions: {description: "Equivalent to bcftools norm's `--regions` option.", category: "advanced"} + splitMultiallelicSites: {description: "Whether multiallelic lines should be split up.", category: "advanced"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Sorted VCF file."} + outputVcfIndex: {description: "Index of sorted VCF file."} + } +} + task Sort { input { File inputFile From 5d4f5a7fa3846dea7b8a16fce9c47d8674a5f260 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:26:56 +0200 Subject: [PATCH 12/18] more time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..5fb06016 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c97c55a47411b2395289ed3bf0357d8686dc7350 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:38:14 +0200 Subject: [PATCH 13/18] more time for bcftools Norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 5fb06016..56564b17 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) * 2 + Int timeMinutes = 5 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 88ac2526f86f5a89d6de0fe74077f6bab05baf8d Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:52:37 +0200 Subject: [PATCH 14/18] reset time for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 56564b17..1b00b5fe 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -191,7 +191,7 @@ task Norm { Boolean splitMultiallelicSites = false String memory = "2GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "G")) + Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 3c8ec631930d4ec7df1d01ba802d5943257dfd42 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 12:58:40 +0200 Subject: [PATCH 15/18] fix bcftools norm --- bcftools.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index 1b00b5fe..ae2dee4e 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -207,7 +207,8 @@ task Norm { -O ~{true="z" false="v" compressed} \ ~{"--regions " + regions} \ ~{"--fasta " + fasta} \ - ~{if splitMultiallelicSites then "--multiallelics -both" else ""} + ~{if splitMultiallelicSites then "--multiallelics -both" else ""} \ + ~{inputFile} ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""} } From 5dab6c7b08f05e831ea110b44acdefedc298f67b Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 13:36:41 +0200 Subject: [PATCH 16/18] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index ae2dee4e..b48956cc 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "2GiB" + String memory = "10GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From 99c562c5e8ed51e8a2a04ec5dc72dada5248ff9e Mon Sep 17 00:00:00 2001 From: DavyCats Date: Tue, 10 Jun 2025 15:08:05 +0200 Subject: [PATCH 17/18] more memory for bcftools norm --- bcftools.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bcftools.wdl b/bcftools.wdl index b48956cc..7e297bc7 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,7 +190,7 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "10GiB" + String memory = "64GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } From c676fe2198b18b437a3e279c240290b7227b94d4 Mon Sep 17 00:00:00 2001 From: DavyCats Date: Thu, 17 Jul 2025 10:37:57 +0200 Subject: [PATCH 18/18] address review comments --- bcftools.wdl | 13 ++++++++----- snpeff.wdl | 7 ++++++- snpsift.wdl | 7 ++++++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/bcftools.wdl b/bcftools.wdl index 7e297bc7..31c7db13 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -190,8 +190,9 @@ task Norm { String? regions Boolean splitMultiallelicSites = false - String memory = "64GiB" + String memory = "4GiB" Int timeMinutes = 1 + ceil(size(inputFile, "G")) + Int diskGb = ceil(2.1 * size(inputFile, "G") + size(fasta, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" } @@ -199,7 +200,7 @@ task Norm { command { set -e - ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools norm \ @@ -222,6 +223,7 @@ task Norm { memory: memory time_minutes: timeMinutes docker: dockerImage + disks: "local-disk ~{diskGb} SSD" # Based on an example in dxCompiler docs } parameter_meta { @@ -234,11 +236,12 @@ task Norm { memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + diskGb: {description: "The amount of disk space needed for this job in GiB.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Sorted VCF file."} - outputVcfIndex: {description: "Index of sorted VCF file."} + outputVcf: {description: "Normalized VCF file."} + outputVcfIndex: {description: "Index of Normalized VCF file."} } } @@ -424,7 +427,7 @@ task View { command { set -e - ls ~{inputFile} ~{inputFileIndex} + ls ~{inputFile} ~{inputFileIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" bcftools view \ diff --git a/snpeff.wdl b/snpeff.wdl index e1b520af..b972ab30 100644 --- a/snpeff.wdl +++ b/snpeff.wdl @@ -48,7 +48,7 @@ task SnpEff { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" unzip ~{datadirZip} snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -82,6 +82,7 @@ task SnpEff { } parameter_meta { + # inputs vcf: {description: "A VCF file to analyse.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "required"} genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"} @@ -102,5 +103,9 @@ task SnpEff { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Annotated VCF file."} + outputVcfIndex: {description: "Index of annotated VCF file."} } } diff --git a/snpsift.wdl b/snpsift.wdl index 4c354f48..a62f7295 100644 --- a/snpsift.wdl +++ b/snpsift.wdl @@ -40,7 +40,7 @@ task SnpSiftFilter { command { set -e - ls ~{vcf} ~{vcfIndex} + ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud mkdir -p "$(dirname ~{outputPath})" SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \ @@ -64,6 +64,7 @@ task SnpSiftFilter { } parameter_meta { + # inputs vcf: {description: "A VCF file to filter.", category: "required"} vcfIndex: {description: "The index for the VCF file.", category: "common"} filterExpression: {description: "The SnpSift filtering expression.", category: "required"} @@ -75,5 +76,9 @@ task SnpSiftFilter { timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + outputVcf: {description: "Filtered VCF file."} + outputVcfIndex: {description: "Index of filtered VCF file."} } }