biowdl · DavyCats · Jul 17, 2025 · May 26, 2025 · May 26, 2025 · May 26, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,14 @@ that users understand how the changes affect the new version.
 
 version 6.0.0-dev
 ---------------------------
++ Add a task for bcftools norm.
++ Add support for outputting compressed files to snpeff and snpsift.
++ Fixed an issue with the parameter_meta section of bcftools annotate
+  which caused wdlTools to error on parsing the file.
++ Updated the bcftools view task with an input for an index file.
++ Updated the bcftools view task to allow specifying a region.
++ Added a task for SnpSift filter.
++ Updated the snpEff task to allow setting the `-no-upstream` flag.
 + Update vt task to allow a filter expression and compress and index the output.
 + MultiQC image updated to version 1.28
 + Samtools merge now has options added for merging RG and PG headers.

diff --git a/bcftools.wdl b/bcftools.wdl
@@ -111,7 +111,7 @@ task Annotate {
         collapse: {description: "Treat as identical records with <snps|indels|both|all|some|none>, see man page for details.", category: "advanced"}
         exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"}
         headerLines: {description: "Lines to append to the VCF header (see man page for details).", category: "advanced"}
-        newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\_%POS').", category: "advanced"}
+        newId: {description: "Assign ID on the fly (e.g. --set-id +'%CHROM\\_%POS').", category: "advanced"}
         include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"}
         markSites: {description: "Annotate sites which are present ('+') or absent ('-') in the -a file with a new INFO/TAG flag.", category: "advanced"}
         regions: {description: "Restrict to comma-separated list of regions.", category: "advanced"}
@@ -180,6 +180,71 @@ task Filter {
     }
 }
 
+task Norm {
+    input {
+        File inputFile
+        File? inputFileIndex
+        String outputPath = "output.vcf.gz"
+
+        File? fasta
+        String? regions
+        Boolean splitMultiallelicSites = false
+
+        String memory = "4GiB"
+        Int timeMinutes = 1 + ceil(size(inputFile, "G"))
+        Int diskGb = ceil(2.1 * size(inputFile, "G") + size(fasta, "G"))
+        String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2"
+    }
+
+    Boolean compressed = basename(outputPath) != basename(outputPath, ".gz")
+
+    command {
+        set -e
+        ls ~{inputFile} ~{inputFileIndex}  # dxCompiler localization workaroud
+
+        mkdir -p "$(dirname ~{outputPath})"
+        bcftools norm \
+        -o ~{outputPath} \
+        -O ~{true="z" false="v" compressed} \
+        ~{"--regions " + regions} \
+        ~{"--fasta " + fasta} \
+        ~{if splitMultiallelicSites then "--multiallelics -both" else ""} \
+        ~{inputFile}
+
+        ~{if compressed then "bcftools index --tbi ~{outputPath}" else ""}
+    }
+
+    output {
+        File outputVcf = outputPath
+        File? outputVcfIndex = outputPath + ".tbi"
+    }
+
+    runtime {
+        memory: memory
+        time_minutes: timeMinutes
+        docker: dockerImage
+        disks: "local-disk ~{diskGb} SSD" # Based on an example in dxCompiler docs
+    }
+
+    parameter_meta {
+        # inputs
+        inputFile: {description: "A vcf or bcf file.", category: "required"}
+        outputPath: {description: "The location the output VCF file should be written.", category: "common"}
+        fasta: {description: "Equivalent to bcftools norm's `--fasta` option.", category: "advanced"}
+        regions: {description: "Equivalent to bcftools norm's `--regions` option.", category: "advanced"}
+        splitMultiallelicSites: {description: "Whether multiallelic lines should be split up.", category: "advanced"}
+
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        diskGb: {description: "The amount of disk space needed for this job in GiB.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+        # outputs
+        outputVcf: {description: "Normalized VCF file."}
+        outputVcfIndex: {description: "Index of Normalized VCF file."}
+    } 
+}
+
 task Sort {
     input {
         File inputFile
@@ -344,11 +409,13 @@ task Stats {
 task View {
     input {
         File inputFile
+        File? inputFileIndex
         String outputPath = "output.vcf"
         Boolean excludeUncalled = false
 
         String? exclude
         String? include
+        String? region
         Array[String] samples = []
 
         String memory = "256MiB"
@@ -360,6 +427,8 @@ task View {
 
     command {
         set -e
+        ls ~{inputFile} ~{inputFileIndex}  # dxCompiler localization workaroud
+
         mkdir -p "$(dirname ~{outputPath})"
         bcftools view \
         ~{"--exclude " + exclude} \
@@ -368,7 +437,8 @@ task View {
         ~{if length(samples) > 0 then "-s" else ""} ~{sep="," samples} \
         -o ~{outputPath} \
         -O ~{true="z" false="v" compressed} \
-        ~{inputFile}
+        ~{inputFile} \
+        ~{region}
 
         ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''}
     }
@@ -387,9 +457,11 @@ task View {
     parameter_meta {
         # inputs
         inputFile: {description: "A vcf or bcf file.", category: "required"}
+        inputFileIndex: {description: "the index for the input file.", category: "common"}
         outputPath: {description: "The location the output VCF file should be written.", category: "common"}
         include: {description: "Select sites for which the expression is true (see man page for details).", category: "advanced"}
         exclude: {description: "Exclude sites for which the expression is true (see man page for details).", category: "advanced"}
+        region: {description: "The region to retrieve from the VCF file.", category: "common"}
         excludeUncalled: {description: "Exclude sites without a called genotype (see man page for details).", category: "advanced"}
         samples: {description: "A list of sample names to include.", category: "advanced"}
         memory: {description: "The amount of memory this job will use.", category: "advanced"}

diff --git a/snpeff.wdl b/snpeff.wdl
@@ -32,18 +32,23 @@ task SnpEff {
         Boolean hgvs = true
         Boolean lof = true
         Boolean noDownstream = false
+        Boolean noUpstream = false
         Boolean noIntergenic = false
         Boolean noShiftHgvs = false
         Int? upDownStreamLen
 
         String memory = "9GiB"
         String javaXmx = "8G"
         Int timeMinutes = 60
-        String dockerImage = "quay.io/biocontainers/snpeff:5.0--0"
+        # Multicontainer with snpeff 5.2 and bgzip/tabix 1.19.1
+        String dockerImage = "quay.io/biocontainers/mulled-v2-2fe536b56916bd1d61a6a1889eb2987d9ea0cd2f:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0"
     }
 
+    Boolean compressed = basename(outputPath) != basename(outputPath, ".gz")
+
     command {
         set -e
+        ls ~{vcf} ~{vcfIndex}  # dxCompiler localization workaroud
         mkdir -p "$(dirname ~{outputPath})"
         unzip ~{datadirZip}
         snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
@@ -55,15 +60,19 @@ task SnpEff {
         ~{true="-hgvs" false="-noHgvs" hgvs} \
         ~{true="-lof" false="-noLof" lof} \
         ~{true="-no-downstream" false="" noDownstream} \
+        ~{true="-no-upstream" false="" noUpstream} \
         ~{true="-no-intergenic" false="" noIntergenic} \
         ~{true="-noShiftHgvs" false="" noShiftHgvs} \
         ~{"-upDownStreamLen " + upDownStreamLen} \
-        > ~{outputPath}
+        ~{if compressed then "| bgzip " else ""} > ~{outputPath}
+
+        ~{if compressed then "tabix ~{outputPath}" else ""}
         rm -r $PWD/data
     }
 
     output {
         File outputVcf = outputPath
+        File? outputVcfIndex = outputPath + ".tbi"
     }
 
     runtime {
@@ -73,6 +82,7 @@ task SnpEff {
     }
 
     parameter_meta {
+        # inputs
         vcf: {description: "A VCF file to analyse.", category: "required"}
         vcfIndex: {description: "The index for the VCF file.", category: "required"}
         genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"}
@@ -82,6 +92,7 @@ task SnpEff {
         hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"}
         lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"}
         noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"}
+        noUpstream: {description: "Equivalent to the `-no-upstream` flag.", category: "advanced"}
         noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"}
         noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"}
         upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"}
@@ -92,5 +103,9 @@ task SnpEff {
         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
         dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
                       category: "advanced"}
+
+        # outputs
+        outputVcf: {description: "Annotated VCF file."}
+        outputVcfIndex: {description: "Index of annotated VCF file."}
     }
 }
diff --git a/snpsift.wdl b/snpsift.wdl
@@ -0,0 +1,84 @@
+version 1.0
+
+# MIT License
+#
+# Copyright (c) 2025 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task SnpSiftFilter {
+    input {
+        File vcf
+        File? vcfIndex
+        String filterExpression
+        String outputPath = "./snpsift_filter.vcf"
+
+        String memory = "9GiB"
+        String javaXmx = "8G"
+        Int timeMinutes = 60
+        # Multicontainer with SnpSift 5.2 and bgzip/tabix 1.22
+        String dockerImage = "quay.io/biocontainers/mulled-v2-d4bc0c23eb1d95c7ecff7f0e8b3a4255503fd5d4:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0"
+    }
+
+    Boolean compressed = basename(outputPath) != basename(outputPath, ".gz")
+
+    command {
+        set -e
+        ls ~{vcf} ~{vcfIndex}  # dxCompiler localization workaroud
+
+        mkdir -p "$(dirname ~{outputPath})"
+        SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+        filter \
+        "~{filterExpression}" \
+        ~{vcf} \
+        ~{if compressed then "| bgzip " else ""} > ~{outputPath}
+
+        ~{if compressed then "tabix ~{outputPath}" else ""}
+    }
+
+    output {
+        File outputVcf = outputPath
+        File? outputVcfIndex = outputPath + ".tbi"
+    }
+
+    runtime {
+        docker: dockerImage
+        time_minutes: timeMinutes # !UnknownRuntimeKey
+        memory: memory
+    }
+
+    parameter_meta {
+        # inputs
+        vcf: {description: "A VCF file to filter.", category: "required"}
+        vcfIndex: {description: "The index for the VCF file.", category: "common"}
+        filterExpression: {description: "The SnpSift filtering expression.", category: "required"}
+        outputPath: {description: "The path to write the output to.", category: "common"}
+
+        memory: {description: "The amount of memory this job will use.", category: "advanced"}
+        javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+                  category: "advanced"}
+        timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+        dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+                      category: "advanced"}
+
+        # outputs
+        outputVcf: {description: "Filtered VCF file."}
+        outputVcfIndex: {description: "Index of filtered VCF file."}
+    }
+}