From e39fe10360989d5074580034a4df030e16d27f4c Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:08:55 +0200 Subject: [PATCH 1/6] do not intermingle singletons --- samtools.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index d724a692..ef89477d 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -157,6 +157,7 @@ task Fastq { String outputRead1 String? outputRead2 String? outputRead0 + String? outputReadS Boolean appendReadNumber = false Boolean outputQuality = false @@ -177,8 +178,10 @@ task Fastq { samtools collate -u -O ~{inputBam} | \ samtools fastq \ ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ + ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ + ~{"-s " + outputReadS} \ ~{"-f " + includeFilter} \ ~{"-F " + excludeFilter} \ ~{"-G " + excludeSpecificFilter} \ @@ -192,6 +195,7 @@ task Fastq { File read1 = outputRead1 File? read2 = outputRead2 File? read0 = outputRead0 + File? readS = outputReadS } runtime { @@ -207,6 +211,7 @@ task Fastq { outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"} outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"} outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"} + outputReadS: {description: "The location singleton reads should be written to.", category: "advanced"} appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`.", category: "advanced"} outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"} includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`.", category: "advanced"} From b9319418b7a96a0046b9c034649930ccd5cf4fa9 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:09:16 +0200 Subject: [PATCH 2/6] The caches in containers caused issues --- samtools.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/samtools.wdl b/samtools.wdl index ef89477d..315a00b5 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,6 +571,9 @@ task Split { command { set -e mkdir -p "~{outputPath}/rg/" + + export XDG_CACHE_HOME=$PWD/.cache/ + export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 8ede8b774a0296fe484e9f78e25d5d358828099e Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:15:01 +0200 Subject: [PATCH 3/6] add biopets validate fastq --- biopet.wdl | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 biopet.wdl diff --git a/biopet.wdl b/biopet.wdl new file mode 100644 index 00000000..ea8a36c8 --- /dev/null +++ b/biopet.wdl @@ -0,0 +1,60 @@ +version 1.0 + +# Copyright (c) 2025 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +task ValidateFastq { + input { + File inputRead1 + File? inputRead2 + + String memory = "1GiB" + Int timeMinutes = 5 + ceil(size(inputRead1, "GiB")) + String dockerImage = "quay.io/biocontainers/biopet-validatefastq:0.1.1--hdfd78af_3" + } + + command { + set -e + java -jar /usr/local/share/biopet-validatefastq-0.1.1-3/validatefastq-assembly-0.1.1.jar \ + --fastq1 ~{inputRead1} \ + ~{"--fastq2 " + inputRead2} + } + + output { + } + + runtime { + cpu: 1 + memory: memory + docker: dockerImage + time_minutes: timeMinutes + } + + parameter_meta { + # inputs + inputRead1: {description: "The location of the first FASTQ file (first reads for pairs, in case of paired-end sequencing).", category: "required"} + inputRead2: {description: "The location of the paired end reads.", category: "common"} + + memory: {description: "The amount of memory this job will use.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + } +} From 8493c77e477c5522b0947948b47e35be04974fc7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:19:23 +0200 Subject: [PATCH 4/6] require being explicit about locations --- samtools.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/samtools.wdl b/samtools.wdl index 315a00b5..5bb2fb82 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -177,7 +177,6 @@ task Fastq { mkdir -p "$(dirname ~{outputRead1})" samtools collate -u -O ~{inputBam} | \ samtools fastq \ - ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \ ~{"-1 " + outputRead1} \ ~{"-2 " + outputRead2} \ ~{"-0 " + outputRead0} \ From 347ed91d4bff4306cea0074ca7f1c7fa2ff517b7 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:20:09 +0200 Subject: [PATCH 5/6] Probably unnecessary --- samtools.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/samtools.wdl b/samtools.wdl index 5bb2fb82..0ef1419c 100644 --- a/samtools.wdl +++ b/samtools.wdl @@ -571,8 +571,6 @@ task Split { set -e mkdir -p "~{outputPath}/rg/" - export XDG_CACHE_HOME=$PWD/.cache/ - export REF_CACHE=$PWD/.cache/hts-ref/%2s/%2s/%s samtools split \ --output-fmt bam \ --output-fmt-option level=~{compressionLevel} \ From 7ff2ac2c1ebab33a3872297beb189e648eb90724 Mon Sep 17 00:00:00 2001 From: Helena Rasche Date: Wed, 23 Apr 2025 13:22:10 +0200 Subject: [PATCH 6/6] documentation --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 986dfd13..f20dc82d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,8 @@ version 6.0.0-dev + Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. + New samtools task: split. + Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Add `biopet.ValidateFastq` to check your fastq files for pairing and other correctness. ++ **Breaking**: `samtools.Fastq` now requires defining your singleton read location. This only affects you if you were previously using this task with only a single output read file. version 5.2.0 ---------------------------