From 847ad71a26b3a1ddc1fc06c2fda349fc620ad2b5 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 9 Apr 2025 19:40:40 +0200 Subject: [PATCH 1/2] Update vt to allow a filter expression and compressed indexed output --- CHANGELOG.md | 1 + vt.wdl | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 96adc8fa..dfa40b75 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ that users understand how the changes affect the new version. version 6.0.0-dev --------------------------- ++ Update vt task to allow a filter expression and compress and index the output. + MultiQC image updated to version 1.28 + Samtools merge now has options added for merging RG and PG headers. + Samtools merge default thread count increased based on the number of files. diff --git a/vt.wdl b/vt.wdl index 4da2d8cd..4ced1d2a 100644 --- a/vt.wdl +++ b/vt.wdl @@ -27,27 +27,39 @@ task Normalize { File referenceFasta File referenceFastaFai Boolean ignoreMaskedRef = false - String outputPath = "./vt/normalized_decomposed.vcf" + String outputPath = "./vt/normalized_decomposed.vcf.gz" + String? filterExpression + + Int compressionLevel = 1 String memory = "4GiB" - Int timeMinutes = 30 - String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2" + Int timeMinutes = 10 + ceil(size(inputVCF, "GiB") * 240) + String dockerImage = "quay.io/biocontainers/vt:0.57721--h2419454_12" } command { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" - vt normalize ~{inputVCF} \ + vt view -h \ + ~{"-f " + filterExpression} \ + ~{inputVCF} \ + | vt normalize - \ -r ~{referenceFasta} \ ~{true="-m " false="" ignoreMaskedRef} \ - | vt decompose -s - -o ~{outputPath} + | vt decompose -s - \ + | vt view - \ + -c ~{compressionLevel} \ + -o ~{outputPath} + vt index ~{outputPath} } output { File outputVcf = outputPath + File outputVcfIndex = outputPath + ".tbi" } runtime { + cpu: 1 memory: memory time_minutes: timeMinutes docker: dockerImage @@ -61,11 +73,15 @@ task Normalize { referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"} ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"} outputPath: {description: "The location the output VCF file should be written.", category: "common"} + filterExpression: {description: "See https://genome.sph.umich.edu/wiki/Vt#Filters for valid expressions.", category: "common"} + compressionLevel: {description: "Compression level for the out vcf.gz file.", category: "advanced"} + memory: {description: "The memory required to run the programs.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} # outputs - outputVcf: {description: "Normalized & decomposed VCF file."} + outputVcf: {description: "Normalized and decomposed VCF file."} + outputVcfIndex: {description: "Index for normalized and decomposed VCF file."} } } From 57018dd55c43af0013f48a61e5119128ccd87d3f Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 11 Apr 2025 11:28:56 +0200 Subject: [PATCH 2/2] Properly quote vt filter --- vt.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vt.wdl b/vt.wdl index 4ced1d2a..635641e9 100644 --- a/vt.wdl +++ b/vt.wdl @@ -41,7 +41,7 @@ task Normalize { set -eo pipefail mkdir -p "$(dirname ~{outputPath})" vt view -h \ - ~{"-f " + filterExpression} \ + ~{"-f '" + filterExpression}~{true="'" false="" defined(filterExpression)} \ ~{inputVCF} \ | vt normalize - \ -r ~{referenceFasta} \