From 63dceb22e11e16a45f8ac04f1c466100e8a263f6 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 18 Feb 2025 16:24:21 +0100 Subject: [PATCH 01/13] Start on a VEP task --- vep.wdl | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 vep.wdl diff --git a/vep.wdl b/vep.wdl new file mode 100644 index 00000000..83eeac4e --- /dev/null +++ b/vep.wdl @@ -0,0 +1,74 @@ +version 1.0 + +# Copyright (c) 2017 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Vep { + input { + File inputFile + String outputPath = "vep.annotated.vcf.gz" + File cacheTar + File? pluginsTar + String? species + Array[String] plugins = [] + Boolean refseq = false + Boolean merged = false + + Boolean everything = false + Boolean symbol = false + + } + + command <<< + set -e + mkdir vep_cache + tar -x --directory vep_cache -f ~{cacheTar} + ~{"tar -x --directory vep_cache -f " + pluginsTar} + + # Output all stats files by default for MultiQC integration + vep \ + --input_file ~{inputFile} \ + ~{"--species " + species} \ + --stats_html --stats_text \ + --dir vep_cache \ # Output all stats files by default for MultiQC integration + + --offline \ + ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + --vcf \ + --compress-output bgzip \ + ~{true="--refseq" false="" refseq} \ + ~{true="--merged" false="" merged} \ + \ + ~{true="--everything" false="" everything} \ + ~{true="--symbol" false="" symbol} \ + + + # Cleanup the tar extract to save filesystem space + rm -rf vep_cache + + + >>> + + output { + File outputFile = outputPath + File statsHtml = outputPath + "_summary.html" + } + +} \ No newline at end of file From 405395d512611775ed38021d79b3f4f570d0f23e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 14:31:23 +0100 Subject: [PATCH 02/13] Add runtime requirements --- vep.wdl | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 83eeac4e..496a6b8f 100644 --- a/vep.wdl +++ b/vep.wdl @@ -34,7 +34,10 @@ task Vep { Boolean everything = false Boolean symbol = false - } + String memory = "8GiB" + Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" + } command <<< set -e @@ -71,4 +74,15 @@ task Vep { File statsHtml = outputPath + "_summary.html" } -} \ No newline at end of file + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From b6107be5cdfaf396e53f25f2d93b6220d1f14eb7 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:06:54 +0100 Subject: [PATCH 03/13] Take into account cache tar size for runtime --- vep.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 496a6b8f..4cec3fa3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -35,7 +35,8 @@ task Vep { Boolean symbol = false String memory = "8GiB" - Int timeMinutes = 5 + ceil(size(inputFile, "MiB") * 3) + # Account time for unpacking the cache. + Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } From 5401a6050c9c288f20569b1ffb943f1a05b19d19 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:20:41 +0100 Subject: [PATCH 04/13] Cleanup command --- vep.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vep.wdl b/vep.wdl index 4cec3fa3..f9e7a4a0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -36,7 +36,7 @@ task Vep { String memory = "8GiB" # Account time for unpacking the cache. - Int timeMinutes = 1 + ceil(size(cacheTar, GiB)) + ceil(size(inputFile, "MiB") * 3) + Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 3) String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0" } @@ -51,8 +51,7 @@ task Vep { --input_file ~{inputFile} \ ~{"--species " + species} \ --stats_html --stats_text \ - --dir vep_cache \ # Output all stats files by default for MultiQC integration - + --dir vep_cache \ --offline \ ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ --vcf \ From 701b819d7bebab81385dbd3c159f31ab37e5961b Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:41:20 +0100 Subject: [PATCH 05/13] Add missing ~ --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index f9e7a4a0..636a8ce0 100644 --- a/vep.wdl +++ b/vep.wdl @@ -53,7 +53,7 @@ task Vep { --stats_html --stats_text \ --dir vep_cache \ --offline \ - ~{true="--plugin" false="" length(plugins) > 0} {sep=" --plugin " plugins} \ + ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ --compress-output bgzip \ ~{true="--refseq" false="" refseq} \ From e4654bc7be895cdf5fc80c02fdbfb84b8941d2aa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 15:51:53 +0100 Subject: [PATCH 06/13] properly format commandline option --- vep.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vep.wdl b/vep.wdl index 636a8ce0..626257a3 100644 --- a/vep.wdl +++ b/vep.wdl @@ -55,7 +55,7 @@ task Vep { --offline \ ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \ --vcf \ - --compress-output bgzip \ + --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ \ From bda5ff43ad460a51adcfa9daeb3432ec2156c80d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:21:23 +0100 Subject: [PATCH 07/13] Fix trailing whitespace --- vep.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vep.wdl b/vep.wdl index 626257a3..f2ca4a6e 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,7 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ - ~{"--species " + species} \ + ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ --offline \ @@ -58,7 +58,6 @@ task Vep { --compress_output bgzip \ ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ - \ ~{true="--everything" false="" everything} \ ~{true="--symbol" false="" symbol} \ From 967934c2fd0a4a4f29e4ad87475cd9c68a22298a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 16:39:43 +0100 Subject: [PATCH 08/13] Add missing output file param --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index f2ca4a6e..064cf41a 100644 --- a/vep.wdl +++ b/vep.wdl @@ -49,6 +49,7 @@ task Vep { # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ + --output_file ~{outputPath} \ ~{"--species " + species} \ --stats_html --stats_text \ --dir vep_cache \ @@ -71,6 +72,7 @@ task Vep { output { File outputFile = outputPath File statsHtml = outputPath + "_summary.html" + File statsTxt = outputPath + "_summary.txt" } runtime { From 115f3cfc0da031309a42a5a02d0825a06e1d3e85 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 19 Feb 2025 17:03:00 +0100 Subject: [PATCH 09/13] Make sure output directory is made --- vep.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/vep.wdl b/vep.wdl index 064cf41a..7fb6a660 100644 --- a/vep.wdl +++ b/vep.wdl @@ -43,6 +43,7 @@ task Vep { command <<< set -e mkdir vep_cache + mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} From f29492641550c6d2247a40d216d53c5030d7983d Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:22:49 +0100 Subject: [PATCH 10/13] Complete VEP task --- vep.wdl | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/vep.wdl b/vep.wdl index 7fb6a660..8a5a443b 100644 --- a/vep.wdl +++ b/vep.wdl @@ -41,12 +41,14 @@ task Vep { } command <<< - set -e + set -eu mkdir vep_cache mkdir -p "$(dirname ~{outputPath})" tar -x --directory vep_cache -f ~{cacheTar} ~{"tar -x --directory vep_cache -f " + pluginsTar} + # Make sure vep can error, so the removal always succeeds. + set +e # Output all stats files by default for MultiQC integration vep \ --input_file ~{inputFile} \ @@ -61,13 +63,14 @@ task Vep { ~{true="--refseq" false="" refseq} \ ~{true="--merged" false="" merged} \ ~{true="--everything" false="" everything} \ - ~{true="--symbol" false="" symbol} \ - + ~{true="--symbol" false="" symbol} + VEP_EXIT_CODE=$? + set -e # Cleanup the tar extract to save filesystem space rm -rf vep_cache - + exit $VEP_EXIT_CODE >>> output { @@ -83,8 +86,23 @@ task Vep { } parameter_meta { + # input + inputFile: {description: "The VCF to annotate.", category: "required"} + outputPath: {description: "Where to put the output file", category: "advanced"} + cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} + pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + refseq: {description: "Use the refseq cache", category: "common"} + merged: {description: "Use the merged cache", category: "common"} + everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} + symbol: {description: "Add the gene symbol to the output where available", category: "advanced"} + memory: {description: "The amount of memory this job will use.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # output + outputFile: {description: "The annotated VEP VCF file."} + statsHtml: {description: "The VEP summary stats HTML file."} + statsTxt: {description: "The VEP summary stats TXT file."} } } From eca4681a0baf841dc2fffc2ca3f22930822740a5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 14:45:48 +0100 Subject: [PATCH 11/13] Add VEP to the changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1276efaa..378731bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Add VEP task. + Add Sequali task. + Add Clair3 task. + Add Modkit task. From 203d178e3ea80abef927e7f1ac67d00fec93ff75 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:15:50 +0100 Subject: [PATCH 12/13] Add missing parameter_meta for VEP --- vep.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vep.wdl b/vep.wdl index 8a5a443b..349242fb 100644 --- a/vep.wdl +++ b/vep.wdl @@ -91,6 +91,8 @@ task Vep { outputPath: {description: "Where to put the output file", category: "advanced"} cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work.", category: "required"} pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"} + species: {description: "Which species cache to use", category: "common"} + plugins: {description: "Which plugins to use", category: "common"} refseq: {description: "Use the refseq cache", category: "common"} merged: {description: "Use the merged cache", category: "common"} everything: {description: "Use all annotation sources bundeld with vep.", category: "common"} From 117e5317fbb50c5989b1afd668d469569b78127e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 5 Mar 2025 15:20:15 +0100 Subject: [PATCH 13/13] Add missing Minimap2 parameter_meta --- minimap2.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/minimap2.wdl b/minimap2.wdl index 18127cb1..da301bd3 100644 --- a/minimap2.wdl +++ b/minimap2.wdl @@ -183,6 +183,11 @@ task Mapping { mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"} tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"} howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"} + compressionLevel: {description: "compressionLevel for the output file", category: "advanced"} + additionalSortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"} + sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"} + nameSorted: {description: "Output a name sorted file instead", category: "common"} + cores: {description: "The number of cores to be used.", category: "advanced"} memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}