diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 199344f5..372071ee 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,5 @@ - ### Checklist -- [ ] Pull request details were added to CHANGELOG.md -- [ ] `parameter_meta` for each task is up to date. +- [ ] Pull request details were added to CHANGELOG.md. +- [ ] Documentation was updated (if required). +- [ ] `parameter_meta` was added/updated (if required). +- [ ] Submodule branches are on develop or a tagged commit. diff --git a/.github/lint-environment.yml b/.github/lint-environment.yml new file mode 100644 index 00000000..63b538fc --- /dev/null +++ b/.github/lint-environment.yml @@ -0,0 +1,9 @@ +name: biowdl-lint +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - cromwell + - wdl-aid + - miniwdl diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..7ef19e58 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,93 @@ +name: Linting + +on: + pull_request: + paths_ignore: + - "docs/**" + +defaults: + run: + # This is needed for miniconda, see: + # https://github.com/marketplace/actions/setup-miniconda#important + shell: bash -l {0} + +jobs: + lint: + runs-on: ubuntu-latest + name: Linting checks + steps: + - uses: actions/checkout@v2.3.4 + with: + submodules: recursive + + - name: Set cache date + run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV + + - name: Cache conda environment + # Use an always upload cache to prevent solving conda environment again and again on failing linting. + uses: pat-s/always-upload-cache@v2.1.5 + env: + # Increase this value to manually invalidate the cache + CACHE_NUMBER: 0 + with: + path: /usr/share/miniconda/envs/biowdl-lint + key: + ${{runner.os}}-biowdl-lint-${{ env.CACHE_NUMBER }}-${{env.DATE}}-${{ hashFiles('.github/lint-environment.yml') }} + id: env_cache + + # Use the builtin conda. This is the fastest installation. It may not be + # the fastest for resolving, but the package cache mitigates that problem. + # Since this installs fastest, it is fastest for all runs where a cache + # hit occurs. + - name: install miniconda + uses: conda-incubator/setup-miniconda@v2.1.1 + with: + channels: conda-forge,bioconda,defaults + channel-priority: strict + auto-activate-base: false + use-only-tar-bz2: true # Needed for proper caching according to the documentation. + # activate-environment is broken! This always seems to create a new environment. + # Activation is therefore done separately. + + - name: Create test environment if no cache is present + run: conda env create -n biowdl-lint -f .github/lint-environment.yml + if: steps.env_cache.outputs.cache-hit != 'true' + + - name: Activate test environment + # The new PATH should be passed to the environment, otherwise it won't register. + run: | + conda activate biowdl-lint + echo "PATH=$PATH" >> $GITHUB_ENV + + - name: Fetch develop branch for comparisons + run: git fetch --depth=1 origin develop + + - name: run womtool validate + # Only check files that have changed from the base reference. + # Womtool validate checks very slowly, so this saves a lot of time. + run: | + set -x + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + womtool validate $WDL_FILE + done + - name: run miniwdl check + run: | + set -x + bash -c 'miniwdl check $(git ls-files *.wdl)' + + - name: Check copyright headers + run: | + set -x + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + grep Copyright $WDL_FILE || bash -c "echo No copyright header in $WDL_FILE && exit 1" + done + - name: Check parameter_meta for inputs + run: | + set -x + for WDL_FILE in $(git diff --name-only origin/${{github.base_ref}} | grep -E '*.wdl$'); do + wdl-aid --strict $WDL_FILE > /dev/null 2> wdl-aid_stderr || + if grep -z 'ValueError: Missing parameter_meta for inputs:' wdl-aid_stderr + then + exit 1 + fi + done diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 396b998f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,22 +0,0 @@ -# We use conda to install cromwell. - -language: python - -python: - - 3.6 - -before_install: - # Install conda - - export MINICONDA=${HOME}/miniconda - - export PATH=${MINICONDA}/bin:${PATH} - - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -f -p ${MINICONDA} - - conda config --set always_yes yes - - conda config --add channels defaults - - conda config --add channels bioconda - - conda config --add channels conda-forge - -install: - - conda install --file requirements-test.txt - -script: bash scripts/biowdl_lint.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 96f4559c..1c5c35aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,19 +2,293 @@ Changelog ========== +version 6.0.0-dev +--------------------------- ++ bedtools.Sort: bumped container version to permit use of `faidx`. ++ Add a task for bcftools norm. ++ Add support for outputting compressed files to snpeff and snpsift. ++ Fixed an issue with the parameter_meta section of bcftools annotate + which caused wdlTools to error on parsing the file. ++ Updated the bcftools view task with an input for an index file. ++ Updated the bcftools view task to allow specifying a region. ++ Added a task for SnpSift filter. ++ Updated the snpEff task to allow setting the `-no-upstream` flag. ++ Update vt task to allow a filter expression and compress and index the output. ++ MultiQC image updated to version 1.28 ++ Samtools merge now has options added for merging RG and PG headers. ++ Samtools merge default thread count increased based on the number of files. ++ Update docker images in samtools.wdl ++ Add threads and compression levels to applicable tasks in samtools. Default to + compression level 1. ++ samtools BgzipAndIndex and Tabix "type" parameter changed to "preset" as is + the name of the flag. ++ Unused javaXmx parameter removed from samtools DictAndFaidx ++ Update Picard images ++ Add Mosdepth task. ++ pbmm2 loses the sort parameter. Output is now always sorted. ++ pbmm2 gets an unmapped parameter. ++ Allow pbmm2 to work with a set output prefix for the BAM file. ++ Update pbmm2 docker container to version 1.17 ++ Add VEP task. ++ Add Sequali task. ++ Add Clair3 task. ++ Add Modkit task. ++ Modify minimap2 task to accept ubam input, including transfer of methylation + tags. Also sort the BAM output file by coordinate. ++ Update DeepVariant container and update resource requirements. ++ rtg Format and VcfEval tasks now handle reference as an array of files to enable caching. ++ Added --select-genotype and --exclude-filtered flags to GATK SelectVariants ++ Use softlinks to localise the database for centrifuge. ++ Added the FastqFilter task. ++ Added a new input `revcomp` to cutadapt to set the `--revcomp` flag, defaults to `false`. ++ Added `samtools.Quickcheck` to allow failing on truncated files early. ++ Fixed bug whereby `samtools.Fastq` could produce out of sync R1/R2 when used with an unsorted bam input. `samtools collate` is now used by default to group reads by readname in order to avoid this issue. ++ New samtools task: split. ++ Update `bedtools.Intersect` to support `-wa`, `-wb`, and `-s` flags. ++ Add `biopet.ValidateFastq` to check your fastq files for pairing and other correctness. ++ **Breaking**: `samtools.Fastq` now requires defining your singleton read location. This only affects you if you were previously using this task with only a single output read file. ++ Deprecate `modkit.Pileup`'s bedGraph option, it is now output by default. ++ Add support for filterThreshold/filterPercent for `modkit.Pileup`. ++ Add `modkit.Summary` task. ++ Disable the one-click GDPR dataleak button in MultiQC `--no-ai` by default. ++ Support providing additional reports to MultiQC in workflow configuration. ++ Update clair3 version from 1.0.11 to 1.1.0 ++ Improve whatshap runtime/memory usage for our cluster. ++ Add `Modkit.SampleProbs` + +version 5.2.0 +--------------------------- ++ Update cutadapt version to 4.4 ++ Update FastQC version to 0.12.1 + +version 5.1.0 +--------------------------- ++ Add the `highlyDiploidPercentage` and `somaticMinPuritySpread` inputs to the + hmtools PURPLE task. ++ Add a task for fastp. ++ Add a task for picard CollectInsertSizeMetrics. ++ Increased the timeMinutes runtime attribute for manta (somatic and germline) to `2880`. ++ Add a task for GRIDSS somatic filtering. ++ Add a task to generate a panel of normals BED and BEDPE file for GRIDSS. ++ Add a task to filter a GRIDSS PON. ++ Add a task for delly somatic filtering. ++ Delly CallSV's `bamFile` and `bamIndex` inputs are not arrays of files, allowing + for multiple samples to be included. ++ Add `samples` input to bcftools view to select samples included in the output vcf. ++ Add a separatorChar input to the tagUmi task. ++ Bug fix: Add space between flag and the value provided for macs2 ++ Add optional inputs to macs2, aiming to allow adhering to Encode ATACs-seq. Inputs added: + + nomodel + + gensz + + extsize + + shiftsize + + pval_thres + + bdg + + keepdup + + callsummits ++ Update samtools image to version 1.16. ++ Add targetsFile input for samtools View. ++ Mateclever's runtime attribute defaults were changed to: + + memory: `"250GiB"` + + timeMinutes: `2880` ++ Clever's Prediction task's runtime attribute defaults were changed to: + + memory: `"80GiB"` + + timeMinutes: `2200` ++ The GRIDSS AnnotateSvTypes task now also removes the second breakend of + the breakpoints and single breakends. This will prepare the output better + to be passed into survivor. ++ Updated SURVIVOR version to 1.0.7 ++ Add a combined samtools dict and samtools faidx task. ++ Add a BWA index task. ++ Move all memory notation to `KiB`, `MiB` and `GiB` from `K`, `M` and `G` + previously. The WDL spec clearly distuingishes between SI and binary + notations. Since Java always takes `K`, `M` and `G` to mean `KiB`, `MiB` and + `GiB` this means java tasks such as GATK, FastQC and Picard will always + receive enough memory now. ++ Purple's `somaticRainfallPlot` output is now optional and included in + the `plots` output as well. ++ Bedtools coverage's timeMinutes now defaults to `320`. ++ Gridss' runtime attribute defaults were changed to: + + jvmHeapSizeGb: `64` + + nonJvmMemoryGb: `10` + + threads: `12` ++ Virusbreakend's runtime attribute defaults were changed to: + + threads: `12` + + timeMinutes: `320` ++ Cobalt's timeMinutes now defaults to `480`. ++ Orange's timeMinutes now defaults to 10. ++ Sage's runtime attributes were changed to: + + threads: `32` + + javaXmx: `"16G"` + + memory: `"20G"` + + timeMinutes: `720` ++ Sambamba's runtimeMinutes nor defaults to `320`. ++ Added a task for CupGenerateReport. ++ Updated Cuppa to version 1.6. ++ Added a task for Gripss. ++ Fixed the HealthChecker task's determination of the `succeeded` output + value. ++ Updated Linx to version 1.18. ++ Added a task for LinxVisualization. ++ Added a task for HMFtools Orange. ++ Added a task for HMFtools Pave. ++ Updated Purple to version 3.2. ++ Added plot and table outputs of Sage to task outputs. ++ Updated virus-interpreter to version 1.2. ++ Updated Peach to version 1.5. ++ Added a task to add SVTYPE annotations to GRIDSS results + (`AnnotateSvTypes`). ++ The GRIDSS task will now run tabix separately if GRIDSS doesn't + produce a vcf index. ++ Add a script to subtract UMI's from the read name and add them as + a BAM tag for each BAM record. The script is in umi.BamReadNameToUmiTag. ++ Add fgbio.AnnotateBamWithUmis. ++ Add picard.UmiAwareMarkDuplicatesWithMateCigar. ++ Added a task for SnpEff. ++ Adjusted runtime settings for sambamba Markdup. ++ Added a task for sambamba Flagstat. ++ Added a task for Picard CollectWgsMetrics. ++ Added a task for Peach. ++ Added tasks for HMFtools: + + Amber + + Cobalt + + Cuppa + + CuppaChart + + GripssApplicationKt + + GripssHardFilterApplicationKt + + HealthChecker + + Linx + + Protect + + Purple + + Sage + + VirusInterpreter ++ Added a task for VirusBreakend. ++ Added a task for GridssAnnotateVcfRepeatmasker. ++ Bumped GRIDSS version to 2.12.2. ++ Adjusted GRIDSS runtime settings. ++ Added optional inputs to GRIDSS: + + blacklistBed + + gridssProperties ++ Added a task for GRIDSS AnnotateInsertedSequence. ++ Added a task for ExtractSigPredictHRD. ++ Added a task for DeconstructSigs. ++ Added option useSoftclippingForSupplementary (default false) to + BWA mem. ++ Adjusted BWA mem runtime settings. ++ Added a task for bedtools coverage. ++ Added a task for bcftools filter. ++ Adjusted runtime settings for bcftools annotate. ++ Added optional inputs to bcftools annotate: + + inputFileIndex + + annsFileIndex ++ Update parameter_meta for macs2 ++ Add sample position in array task. + +version 5.0.2 +--------------------------- ++ bumped ScatterRegions container to 1.0.0 + +version 5.0.1 +--------------------------- ++ Smoove: enable genotyping ++ add runtime memory to number of tasks. + +version 5.0.0 +--------------------------- ++ Update CPAT to version 3.0.4. + + Changed the `outFilePath` input to `outputPrefix`. ++ Survivor: Change integer to string literal in boolean parameters. ++ Samtools: Add mkdir line to `Fastq` task. ++ Add new parameters from CCS version 6.0.0 and add two new outputs: + `ccs_report.txt` & `zmw_metrics.json.gz`. ++ Change CutAdapt memory to `5G`. ++ Increase multiqc base time from 5 to 10. ++ Update biowdl-input-converter to version 0.3. ++ Update minimap2 to version 2.20. ++ Update lima to version 2.2.0. ++ Update ccs to version 6.0.0. ++ Update bam2fastx to version 1.3.1. ++ Add memory values to GffCompare, GffRead and CPAT. ++ GffCompare: Make the `referenceAnnotation` input optional. ++ Stringtie: Add the `minimumCoverage` input. ++ UMI-tools: Update default dockerImage to use umitools v1.1.1 with correct + samtools version (1.10). ++ UMI-tools: Re-introduce samtools indexing. ++ UMI-tools: Update default dockerImage to use umitools v1.1.1. ++ UMI-tools dedup: Add tempdir. ++ Bcftools view: Add options for filtering (include, exclude, excludeUncalled). ++ Duphold: Add `duphold.wdl`. ++ Add new wdl file prepareShiny.wdl for creating input files for shiny app. ++ mergePacBio: Rename `mergedReport` to `outputPathMergedReport`. ++ Lima: Fix copy commands. ++ Fixed the `size` call in the default for gffread's timeMinutes, to retrieve + GBs instead of bytes. ++ Update stringtie to version 1.3.6. ++ Update Lima to version 2.0.0. ++ Update IsoSeq3 to version 3.4.0. ++ Update samtools to version 1.11. ++ Update Picard to version 2.23.8. ++ Update NanoPlot to version 1.32.1. ++ Update MultiQC to version 1.9. ++ ~Update StringTie to version 2.1.4.~ ++ Complete `parameter_meta` for tasks missing the outputs. ++ DeepVariant: Add an optional input for the gvcf index. ++ Samtools: `Sort` task now has `threads` in runtime instead of `1`. ++ Picard: Add parameter_meta to `SortSam`. ++ pbmm2: Add parameter_meta for `sample`. ++ Centrifuge: Rename output in task `KReport` to `KrakenReport` to resolve + name collision with task name. ++ Bwa & bwa-mem2: Add parameter_meta for `outputHla`. ++ Multiqc: Removed WDL_AID excludes of "finished" & "dependencies" inputs. ++ Bam2fastx: Add localisation of input files to Bam2Fasta task. ++ Lima: `cores` input has been renamed to `threads` to match tool naming. ++ isoseq3: `cores` input has been renamed to `threads` to match tool naming. ++ CCS: `cores` input has been renamed to `threads` to match tool naming. ++ Add PacBio preprocessing specific tasks `mergePacBio` & `ccsChunks`. ++ CCS: Update CCS to version 5. ++ deepvariant: Add task for DeepVariant. ++ gatk: Make intervals optional for GenotypeGVCFs. ++ isoseq3: Add required bam index input to isoseq3. ++ pbbam: Add task for indexing PacBio bam files. ++ picard: Add CollectHsMetrics and CollectVariantCallingMetrics. ++ Samtools: Add `threads` to parameter meta for Merge task. ++ bcftools: add tmpDir input to specify temporary directory when sorting. ++ bcftools: remove outputType and implement indexing based on output + file extension. ++ NanoPack: Add parameter_meta to NanoPlot task. ++ Centrifuge: Remove metrics file from classification (which causes the + summary report to be empty). + https://github.com/DaehwanKimLab/centrifuge/issues/83 ++ Add NanoPlot and NanoQC tasks. ++ Centrifuge: Add `timeMinutes` to `Classify` task and remove unnecessary + downloading tasks (alternative is refseqtools). ++ collect-columns: updated docker image to version 1.0.0 and added the + `sumOnDuplicateId` input (defaults to false). ++ survivor: replace integer boolean type to logical true or false value. ++ vt: Add option to ignore masked reference. ++ bcftools: add sorting and annotation. ++ Bam2fastx: Input bam and index are now arrays. ++ Lima: Remove globs from outputs. ++ Updated task gridss.wdl: add --jvmheap parameter. ++ A bwa-mem2 task was created with the same interface (including usePostalt) + as the bwa mem task. ++ bwa mem and bwa kit are now one task. The usePostalt boolean can be used to + switch the postalt script on and off. ++ Added a task for GRIDSS. ++ Add wdl file for pacbio's bam2fastx tool. + version 4.0.0 --------------------------- + Picard MergeVcf now uses compression level 1 by default. + bwa mem, bwa mem+kit and hisat2 have their samtools sort threads tweaked. The number of threads is now related to the number of threads on the aligner. - Using more threads reduces the chance of the samtools sort pipe getting + Using more threads reduces the chance of the samtools sort pipe getting blocked if it's full. + Renamed a few inputs in centrifuge.wdl, isoseq3.wdl, talon.wdl, transcriptclean.wdl to be more descriptive. @@ -43,7 +317,7 @@ version 4.0.0 file. + Added sambamba markdup and sambamba sort. NOTE: samtools sort is more efficient and is recommended. -+ Correctly represent samtools inconsistent use of the threads flag. ++ Correctly represent samtools inconsistent use of the threads flag. Sometimes it means 'threads' sometimes it means 'additional threads'. BioWDL tasks now use only threads. The `threads - 1` conversion is applied where necessary for samtools tools that use additional threads. @@ -78,8 +352,8 @@ version 4.0.0 + Change MultiQC inputs. It now accepts an array of reports files. It does not need access to a folder with the reports anymore. MultiQC can now be used as a normal WDL task without hacks. -+ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will make sure the - task will not fail if one of the metrics is set to false. ++ Picard: Make all outputs in `CollectMultipleMetrics` optional. This will + make sure the task will not fail if one of the metrics is set to false. + The struct `BowtieIndex` was removed, as it has become obsolete. + The task `ReorderGlobbedScatters` was removed, as it has become obsolete. + Adjusted the memory settings of many tools, especially java tools. @@ -101,7 +375,8 @@ version 4.0.0 + Add faidx task to samtools. + Isoseq3: Remove dirname command from output folder creation step. + Isoseq3: Requires more memory by default, is now 2G. -+ Isoseq3: Remove cp commands and other bash magic, file naming is now solved by pipeline. ++ Isoseq3: Remove cp commands and other bash magic, file naming is now + solved by pipeline. + Lima: Replace mv command with cp. + Add WDL task for smoove (lumpy) sv-caller. @@ -112,7 +387,8 @@ version 3.1.0 + Lima: Add missing output to parameter_meta. + Lima: Remove outputPrefix variable from output section. + Isoseq3: Make sure stderr log file from Refine is unique and not overwritten. -+ Isoseq3: Add workaround in Refine for glob command not locating files in output directory. ++ Isoseq3: Add workaround in Refine for glob command not locating files + in output directory. + Isoseq3: Fix --min-polya-length argument syntax. + Lima: Add workaround for glob command not locating files in output directory. + CCS: Add missing backslash. @@ -156,10 +432,13 @@ version 3.0.0 + Rename HaplotypeCallerGVCF to HaplotypeCaller. Add `gvcf` option to set whether output should be a GVCF. + Centrifuge: Add Krona task specific to Centrifuge. -+ Centrifuge: Fix Centrifuge tests, where sometimes the index files could still not be located. ++ Centrifuge: Fix Centrifuge tests, where sometimes the index files could + still not be located. + Update parameter_meta for TALON, Centrifuge and Minimap2. -+ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct index files location. -+ Add `minimumContigLength` input to PlotDenoisedCopyRatios and PlotModeledSegments. ++ Centrifuge: Fix issue where Centrifuge Inspect did not get the correct + index files location. ++ Add `minimumContigLength` input to PlotDenoisedCopyRatios + and PlotModeledSegments. + Add `commonVariantSitesIndex` input to CollectAllelicCounts. + Centrifuge: Fix issue where Centrifuge could not locate index files. + Increase default memory of BWA mem to 32G (was 16G). @@ -182,7 +461,7 @@ version 3.0.0 from going unnoticed. + Centrifuge: Fix -1/-U options for single end data. + Add bedtools.Complement, bedtools.Merge, and add a task to combine multiple - bed files called bedtools.MergeBedFiles. This task combines bedtools merge + bed files called bedtools.MergeBedFiles. This task combines bedtools merge and sort. + Change `g` parameter on bedtools.Sort to `genome`. + Add `ploidity` and `excludeIntervalList` to gatk.HaplotypeCallerGvcf. @@ -195,11 +474,13 @@ version 3.0.0 + Removed the "extraArgs" input from FilterMutectCalls. + Removed unused "verbose" and "quiet" inputs from multiqc. + Added parameter_meta sections to a variety of tasks. -+ Picard's BedToIntervalList outputPath input is now optional (with a default of "regions.interval_list"). ++ Picard's BedToIntervalList outputPath input is now + optional (with a default of "regions.interval_list"). + TALON: Fix SQLite error concerning database/disk space being full. + Update htseq to default image version 0.11.2. + Update biowdl-input-converter in common.wdl to version 0.2.1. -+ Update TALON section to now include the new annotation file output, and add config file creation to the TALON task. ++ Update TALON section to now include the new annotation file output, and + add config file creation to the TALON task. + Removed unused inputs (trimPrimer and format) for cutadapt. + Various minor command tweaks to increase stability. + Fixed unused inputs in bedtools sort (inputs are now used). @@ -212,7 +493,8 @@ version 2.1.0 + Updated biowdl-input-converter version. + GATK CombineGVCFs memory was tripled to prevent it from using a lot of CPU in Garbage Collection mode. -+ Updated parameter_meta sections for Minimap2 and TranscriptClean to wdl-aid format. ++ Updated parameter_meta sections for Minimap2 and TranscriptClean to + wdl-aid format. + Updated cores variable for TALON, the default is now 4. + Updated TALON to version 4.4. + Added parameter_meta sections to the following tools: @@ -229,11 +511,15 @@ version 2.1.0 version 2.0.0 --------------------------- + TranscriptClean: Update TranscriptClean to version 2.0.2. -+ Memory runtime attributes are now Strings indicating total memory, as opposed to Ints indicating memory per core. -+ Memory inputs for most tasks are now Strings, remaining Int memory inputs are renamed to "memoryGb". -+ Use the biowdl-input-converter container for JsonToYaml, to reduce the amount of containers needed. -+ Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists which it replaces. -+ GATK.GenotypeGVCFs: Increased memoryMultiplier from 2.0 to 3.0 . ++ Memory runtime attributes are now Strings indicating total memory, as + opposed to Ints indicating memory per core. ++ Memory inputs for most tasks are now Strings, remaining Int memory inputs + are renamed to "memoryGb". ++ Use the biowdl-input-converter container for JsonToYaml, to reduce the + amount of containers needed. ++ Add biowdl-input-converter and remove SampleConfigToSampleReadgroupLists + which it replaces. ++ GATK.GenotypeGVCFs: Increased memoryMultiplier from 2.0 to 3.0. + Minimap2: Add -k option to minimap2 mapping. + Added bwakit task. + Minimap2: Add the option for --MD tag. @@ -243,10 +529,10 @@ version 1.0.0 --------------------------- + Common: Add "SampleConfigToSampleReadgroupLists" task. + MultiQC: the "interactive" input is now set to true by default. -+ Removed deprecated tasks: - + bioconda.installPrefix - + mergecounts.MergeCounts -+ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" and "knownIndelsSitesVCFIndexes" are no longer optional, but now have a default of "[]". ++ Removed deprecated tasks: bioconda.installPrefix, mergecounts.MergeCounts ++ GATK.BaseRecalibrator: "knownIndelsSitesVCFs" + and "knownIndelsSitesVCFIndexes" are no longer optional, but + now have a default of "[]". + Removed BWA index task. + Removed unused "picardJar" input from bwa.wdl. + All inputs to bedtools Sort are now reflected in the generated command. @@ -262,17 +548,25 @@ version 1.0.0 + Fastqsplitter: use version 1.1. + Picard: Use version 2.20.5 of the biocontainer as this includes the R dependency. + Common: Update dockerTag to dockerImage. -+ GATK: Add CombineVariants task that allows, e.g., to merge VCFs from different callers. -+ Mutect2: Add GATK tasks related to variant filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, CalculateContamination and FilterMutectCalls). -+ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring an update to GATK 4.1.2.0. ++ GATK: Add CombineVariants task that allows, e.g., to merge VCFs + from different callers. ++ Mutect2: Add GATK tasks related to variant + filtering (LearnReadOrientationModel, MergeStats, GetPileupSummaries, + CalculateContamination and FilterMutectCalls). ++ Mutect2: Add "--germline-resource" and "--f1r2-tar-gz" inputs, requiring + an update to GATK 4.1.2.0. + Mutect2: Add necessary missing index attribute for panel of normals. + MultiQC: Add memory variable to multiqc task. -+ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need regions files as required inputs. -+ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired VCF filtering script. -+ Cutadapt: If the output is a gzipped file, compress with level 1 (instead of default 6). ++ GATK: SplitNCigarReads, BaseRecalibration and ApplyBQSR do no longer need + regions files as required inputs. ++ VarDict: Add user definable flags (-M, -A, -Q, -d, -v, -f) to the paired + VCF filtering script. ++ Cutadapt: If the output is a gzipped file, compress with + level 1 (instead of default 6). + Cutadapt: Fix issues with read2output when using single-end reads. + Add feature type, idattr and additional attributes to htseq-count. + Added allow-contain option to bowtie. + Added a changelog to keep track of changes. -+ Added sortByName task in samtools to support more memory efficient execution of HTSeqCount. ++ Added sortByName task in samtools to support more memory efficient + execution of HTSeqCount. + Removed the bam index from HTSeqCount's inputs. diff --git a/CPAT.wdl b/CPAT.wdl index 3b542e4f..b96ea0d7 100644 --- a/CPAT.wdl +++ b/CPAT.wdl @@ -23,26 +23,32 @@ version 1.0 task CPAT { input { File gene - String outFilePath + String outputPrefix File hex File logitModel + File? referenceGenome - File? referenceGenomeIndex # Should be added as input if - # CPAT should not index the reference genome. + # Should be added as input if CPAT should not index the + # reference genome. + File? referenceGenomeIndex Array[String]? startCodons Array[String]? stopCodons - Int timeMinutes = 10 + ceil(size(gene, "G") * 30) - String dockerImage = "biocontainers/cpat:v1.2.4_cv1" + + String memory = "4GiB" + Int timeMinutes = 10 + ceil(size(gene, "GiB") * 30) + String dockerImage = "quay.io/biocontainers/cpat:3.0.4--py39hcbe4a3b_0" } - # Some WDL magic in the command section to properly output the start and stopcodons to the command. - # select_first is needed in order to convert the optional arrays to non-optionals. + # Some WDL magic in the command section to properly output the start and + # stopcodons to the command. + # select_first is needed in order to convert the optional arrays + # to non-optionals. command { set -e - mkdir -p "$(dirname ~{outFilePath})" + mkdir -p "$(dirname ~{outputPrefix})" cpat.py \ --gene ~{gene} \ - --outfile ~{outFilePath} \ + --outfile ~{outputPrefix} \ --hex ~{hex} \ --logitModel ~{logitModel} \ ~{"--ref " + referenceGenome} \ @@ -51,27 +57,32 @@ task CPAT { } output { - File outFile = outFilePath + File orfSeqs = "~{outputPrefix}.ORF_seqs.fa" + File orfProb = "~{outputPrefix}.ORF_prob.tsv" + File orfProbBest = "~{outputPrefix}.ORF_prob.best.tsv" + File noOrf = "~{outputPrefix}.no_ORF.txt" + File rScript = "~{outputPrefix}.r" } runtime { - docker: dockerImage + memory: memory time_minutes: timeMinutes + docker: dockerImage } parameter_meta { + # inputs gene: {description: "Equivalent to CPAT's `--gene` option.", category: "required"} - outFilePath: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} + outputPrefix: {description: "Equivalent to CPAT's `--outfile` option.", category: "required"} hex: {description: "Equivalent to CPAT's `--hex` option.", category: "required"} logitModel: {description: "Equivalent to CPAT's `--logitModel` option.", category: "required"} referenceGenome: {description: "Equivalent to CPAT's `--ref` option.", category: "advanced"} - referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", - category: "advanced"} + referenceGenomeIndex: {description: "The index of the reference. Should be added as input if CPAT should not index the reference genome.", category: "advanced"} startCodons: {description: "Equivalent to CPAT's `--start` option.", category: "advanced"} stopCodons: {description: "Equivalent to CPAT's `--stop` option.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} - dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", - category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} } } diff --git a/LICENSE b/LICENSE index 37eeade5..b1f2b679 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,3 @@ -MIT License - Copyright (c) 2017 Leiden University Medical Center Permission is hereby granted, free of charge, to any person obtaining a copy @@ -9,8 +7,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, diff --git a/README.md b/README.md index 246e3814..2c80e317 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,21 @@ # Tasks - -This repository contains the WDL task definitions used in the various +This repository contains the WDL task definitions used in the various [Biowdl](https://github.com/biowdl) workflows and pipelines. - ## Documentation - -Documentation for this workflow can be found -[here](https://biowdl.github.io/tasks/). +Documentation for this repository can be +found [here](https://biowdl.github.io/tasks/). ## About -These tasks are part of [Biowdl](https://github.com/biowdl) -developed by [the SASC team](http://sasc.lumc.nl/). +These tasks are part of [Biowdl](https://github.com/biowdl) developed by the +SASC team at [Leiden University Medical Center](https://www.lumc.nl/). ## Contact -
-For any question related to these tasks, please use the -github issue tracker -or contact - the SASC team directly at: +For any question related to Tasks, please use the +github issue tracker +or contact the SASC team directly at: + sasc@lumc.nl.
diff --git a/VERSION b/VERSION index 944880fa..03f488b0 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.2.0 +5.3.0 diff --git a/bam2fastx.wdl b/bam2fastx.wdl new file mode 100644 index 00000000..62827fd9 --- /dev/null +++ b/bam2fastx.wdl @@ -0,0 +1,157 @@ +version 1.0 + +# Copyright (c) 2020 Leiden University Medical Center +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +task Bam2Fasta { + input { + Array[File]+ bam + Array[File]+ bamIndex + String outputPrefix + Int compressionLevel = 1 + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2GiB" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + + # Localise the bam and pbi files so they are next to each other in the + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam} + do + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" + done + + for index in ~{sep=" " bamIndex} + do + ln $index . + done + + bam2fasta \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + $bamFiles + } + + output { + File fastaFile = outputPrefix + ".fasta.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bam: {description: "The input pacbio bam file(s).", category: "required"} + bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fasta files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastaFile: {description: "The fasta output file."} + } +} + +task Bam2Fastq { + input { + Array[File]+ bam + Array[File]+ bamIndex + String outputPrefix + Int compressionLevel = 1 + Boolean splitByBarcode = false + + String? seqIdPrefix + + String memory = "2GiB" + Int timeMinutes = 15 + String dockerImage = "quay.io/biocontainers/bam2fastx:1.3.1--hf05d43a_1" + } + + command { + set -e + mkdir -p "$(dirname ~{outputPrefix})" + + # Localise the bam and pbi files so they are next to each other in the + # current folder. + bamFiles="" + for bamFile in ~{sep=" " bam} + do + ln $bamFile . + bamFiles=$bamFiles" $(basename $bamFile)" + done + + for index in ~{sep=" " bamIndex} + do + ln $index . + done + + bam2fastq \ + --output ~{outputPrefix} \ + -c ~{compressionLevel} \ + ~{true="--split-barcodes" false="" splitByBarcode} \ + ~{"--seqid-prefix " + seqIdPrefix} \ + $bamFiles + } + + output { + File fastqFile = outputPrefix + ".fastq.gz" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + bam: {description: "The input pacbio bam file(s).", category: "required"} + bamIndex: {description: "The .pbi index for the input file(s).", category: "required"} + outputPrefix: {description: "Output directory path + output file prefix.", category: "required"} + compressionLevel: {description: "Gzip compression level [1-9]", category: "advanced"} + splitByBarcode: {description: "Split output into multiple fastq files, by barcode pairs.", category: "advanced"} + seqIdPrefix: {description: "Prefix for sequence IDs in headers.", category: "advanced"} + memory: {description: "The amount of memory available to the job.", category: "advanced"} + timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"} + dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"} + + # outputs + fastqFile: {description: "The fastq output file."} + } +} diff --git a/bcftools.wdl b/bcftools.wdl index 2677899b..31c7db13 100644 --- a/bcftools.wdl +++ b/bcftools.wdl @@ -1,7 +1,5 @@ version 1.0 -# MIT License -# # Copyright (c) 2018 Leiden University Medical Center # # Permission is hereby granted, free of charge, to any person obtaining a copy @@ -22,25 +20,140 @@ version 1.0 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -task View { +task Annotate { input { + Array[String] columns = [] + Boolean force = false + Boolean keepSites = false + Boolean noVersion = false + Array[String] samples = [] + Boolean singleOverlaps = false + Array[String] removeAnns = [] File inputFile + File? inputFileIndex String outputPath = "output.vcf.gz" - String memory = "256M" - Int timeMinutes = 1 + ceil(size(inputFile, "G")) + + File? annsFile + File? annsFileIndex + String? collapse + String? exclude + File? headerLines + String? newId + String? include + String? markSites + String? regions + File? regionsFile + File? renameChrs + File? samplesFile + + Int threads = 0 + String memory = "4GiB" + Int timeMinutes = 60 + ceil(size(inputFile, "G")) String dockerImage = "quay.io/biocontainers/bcftools:1.10.2--h4f4756c_2" - String outputType = "z" - Int compressionLevel = 1 } + Boolean compressed = basename(outputPath) != basename(outputPath, ".gz") + command { set -e mkdir -p "$(dirname ~{outputPath})" - bcftools view \ + bcftools annotate \ -o ~{outputPath} \ - -O ~{outputType} \ - -l ~{compressionLevel} \ + -O ~{true="z" false="v" compressed} \ + ~{"--annotations " + annsFile} \ + ~{"--collapse " + collapse} \ + ~{true="--columns" false="" length(columns) > 0} ~{sep="," columns} \ + ~{"--exclude " + exclude} \ + ~{true="--force" false="" force} \ + ~{"--header-lines " + headerLines} \ + ~{"--set-id " + newId} \ + ~{"--include " + include} \ + ~{true="--keep-sites" false="" keepSites} \ + ~{"--mark-sites " + markSites} \ + ~{true="--no-version" false="" noVersion} \ + ~{"--regions " + regions} \ + ~{"--regions-file " + regionsFile} \ + ~{"--rename-chrs " + renameChrs} \ + ~{true="--samples" false="" length(samples) > 0} ~{sep="," samples} \ + ~{"--samples-file " + samplesFile} \ + ~{true="--single-overlaps" false="" singleOverlaps} \ + ~{true="--remove" false="" length(removeAnns) > 0} ~{sep="," removeAnns} \ ~{inputFile} + + ~{if compressed then 'bcftools index --tbi ~{outputPath}' else ''} + } + + output { + File outputVcf = outputPath + File? outputVcfIndex = outputPath + ".tbi" + } + + runtime { + memory: memory + time_minutes: timeMinutes + docker: dockerImage + } + + parameter_meta { + # inputs + columns: {description: "Comma-separated list of columns or tags to carry over from the annotation file (see man page for details).", category: "advanced"} + force: {description: "Continue even when parsing errors, such as undefined tags, are encountered.", category: "advanced"} + keepSites: {description: "Keep sites which do not pass -i and -e expressions instead of discarding them.", category: "advanced"} + noVersion: {description: "Do not append version and command line information to the output VCF header.", category: "advanced"} + samples: {description: "List of samples for sample stats, \"-\" to include all samples.", category: "advanced"} + singleOverlaps: {description: "keep memory requirements low with very large annotation files.", category: "advanced"} + removeAnns: {description: "List of annotations to remove (see man page for details).", category: "advanced"} + inputFile: {description: "A vcf or bcf file.", category: "required"} + inputFileIndex: {description: "The index for the input vcf or bcf.", category: "common"} + outputPath: {description: "The location the output VCF file should be written.", category: "common"} + annsFile: {description: "Bgzip-compressed and tabix-indexed file with annotations (see man page for details).", category: "common"} + annsFileIndex: {description: "The index for annsFile.", category: "common"} + collapse: {description: "Treat as identical records with>>
+
+ output {
+ Int position = read_int(stdout())
+ }
+
+ runtime {
+ # 4 gigs of memory to be able to build the docker image in singularity.
+ memory: "4GiB"
+ docker: dockerImage
+ timeMinutes: 5
+ }
+
+ parameter_meta {
+ # inputs
+ sampleIds: {description: "A list of sample ids.", category: "required"}
+ sample: {description: "The sample for which the position is wanted.", category: "required"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ position: {description: ""}
+ }
}
task MapMd5 {
input {
Map[String,String] map
+ String memory = "1GiB"
String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa"
}
@@ -150,7 +204,7 @@ task MapMd5 {
}
runtime {
- memory: "1G"
+ memory: memory
docker: dockerImage
}
}
@@ -160,6 +214,7 @@ task StringArrayMd5 {
input {
Array[String] stringArray
+ String memory = "1GiB"
String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa"
}
@@ -173,7 +228,7 @@ task StringArrayMd5 {
}
runtime {
- memory: "1G"
+ memory: memory
docker: dockerImage
}
}
@@ -182,6 +237,8 @@ task TextToFile {
input {
String text
String outputFile = "out.txt"
+
+ String memory = "1GiB"
Int timeMinutes = 1
String dockerImage = "debian@sha256:f05c05a218b7a4a5fe979045b1c8e2a9ec3524e5611ebfdd0ef5b8040f9008fa"
}
@@ -194,18 +251,22 @@ task TextToFile {
File out = outputFile
}
- parameter_meta {
- text: {description: "The text to print", category: "required"}
- outputFile: {description: "The name of the output file", category: "common"}
- timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
- }
runtime {
- memory: "1G"
+ memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
+
+ parameter_meta {
+ # inputs
+ text: {description: "The text to print.", category: "required"}
+ outputFile: {description: "The name of the output file.", category: "common"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ out: {description: "File containing input text."}
+ }
}
task YamlToJson {
@@ -213,11 +274,12 @@ task YamlToJson {
File yaml
String outputJson = basename(yaml, "\.ya?ml$") + ".json"
+ String memory = "128MiB"
Int timeMinutes = 1
- String memory = "128M"
# biowdl-input-converter has python and pyyaml.
- String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.2.1--py_0"
+ String dockerImage = "quay.io/biocontainers/biowdl-input-converter:0.3.0--pyhdfd78af_0"
}
+
command {
set -e
mkdir -p "$(dirname ~{outputJson})"
@@ -230,6 +292,7 @@ task YamlToJson {
json.dump(content, output_json)
CODE
}
+
output {
File json = outputJson
}
@@ -241,12 +304,15 @@ task YamlToJson {
}
parameter_meta {
+ # inputs
yaml: {description: "The YAML file to convert.", category: "required"}
outputJson: {description: "The location the output JSON file should be written to.", category: "advanced"}
memory: {description: "The maximum amount of memory the job will need.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ json: {description: "JSON file version of input YAML."}
}
}
diff --git a/cutadapt.wdl b/cutadapt.wdl
index 7faeaff1..c695c08e 100644
--- a/cutadapt.wdl
+++ b/cutadapt.wdl
@@ -32,6 +32,14 @@ task Cutadapt {
Array[String] adapterRead2 = []
Array[String] frontRead2 = []
Array[String] anywhereRead2 = []
+ String reportPath = "cutadapt_report.txt"
+ # Cutadapt compresses the zipped output files with a ridiculously
+ # high compression level (5 or 6).
+ # This is not the fast compression preset. It takes up to 400% more
+ # CPU time for a 20% reduction in file size.
+ # Hence we use compression level 1 here.
+ Int compressionLevel = 1 # This only affects outputs with the .gz suffix.
+
Boolean? interleaved
String? pairFilter
Float? errorRate
@@ -52,7 +60,7 @@ task Cutadapt {
String? stripSuffix
String? prefix
String? suffix
- Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads.
+ Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads.
Int? maximumLength
Int? maxN
Boolean? discardTrimmed
@@ -73,15 +81,12 @@ task Cutadapt {
Boolean? bwa
Boolean? zeroCap
Boolean? noZeroCap
- String reportPath = "cutadapt_report.txt"
- # Cutadapt compresses the zipped output files with a ridiculously high compression level (5 or 6).
- # This is not the fast compression preset. It takes up to 400% more CPU time for a 20% reduction in file size.
- # Hence we use compression level 1 here.
- Int compressionLevel = 1 # This only affects outputs with the .gz suffix.
+ Boolean revcomp = false
+
Int cores = 4
- String memory = "~{300 + 100 * cores}M"
+ String memory = "5GiB"
Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores)
- String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1"
+ String dockerImage = "quay.io/biocontainers/cutadapt:4.4--py310h1425a21_0"
}
String realRead2output = select_first([read2output, "cut_r2.fq.gz"])
@@ -145,6 +150,7 @@ task Cutadapt {
~{true="--bwa" false="" bwa} \
~{true="--zero-cap" false="" zeroCap} \
~{true="--no-zero-cap" false="" noZeroCap} \
+ ~{if revcomp then "--revcomp" else ""} \
~{read1} \
~{read2} \
~{"> " + reportPath}
@@ -152,8 +158,8 @@ task Cutadapt {
output{
File cutRead1 = read1output
- File? cutRead2 = read2output
File report = reportPath
+ File? cutRead2 = read2output
File? tooLongOutput=tooLongOutputPath
File? tooShortOutput=tooShortOutputPath
File? untrimmedOutput=untrimmedOutputPath
@@ -173,22 +179,19 @@ task Cutadapt {
}
parameter_meta {
+ # inputs
read1: {description: "The first or single end fastq file to be run through cutadapt.", category: "required"}
read2: {description: "An optional second end fastq file to be run through cutadapt.", category: "common"}
read1output: {description: "The name of the resulting first or single end fastq file.", category: "common"}
read2output: {description: "The name of the resulting second end fastq file.", category: "common"}
- adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.",
- category: "common"}
- front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.",
- category: "advanced"}
- anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.",
- category: "advanced"}
- adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.",
- category: "common"}
- frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.",
- category: "advanced"}
- anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.",
- category: "advanced"}
+ adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "common"}
+ front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"}
+ anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"}
+ adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", category: "common"}
+ frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"}
+ anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"}
+ reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", category: "common"}
+ compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"}
interleaved: {description: "Equivalent to cutadapt's --interleaved flag.", category: "advanced"}
pairFilter: {description: "Equivalent to cutadapt's --pair-filter option.", category: "advanced"}
errorRate: {description: "Equivalent to cutadapt's --error-rate option.", category: "advanced"}
@@ -230,13 +233,24 @@ task Cutadapt {
bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"}
zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"}
noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"}
- reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.",
- category: "common"}
- compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"}
+ revcomp: {description: "Equivalent to cutadapt's --revcomp flag.", category: "advanced"}
cores: {description: "The number of cores to use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ cutRead1: {description: "Trimmed read one."}
+ report: {description: "Per-adapter statistics file."}
+ cutRead2: {description: "Trimmed read two in pair."}
+ tooLongOutput: {description: "Reads that are too long according to -M."}
+ tooShortOutput: {description: "Reads that are too short according to -m."}
+ untrimmedOutput: {description: "All reads without adapters (instead of the regular output file)."}
+ tooLongPairedOutput: {description: "Second reads in a pair."}
+ tooShortPairedOutput: {description: "Second reads in a pair."}
+ untrimmedPairedOutput: {description: "The second reads in a pair that were not trimmed."}
+ infoFile: {description: "Detailed information about where adapters were found in each read."}
+ restFile: {description: "The rest file."}
+ wildcardFile: {description: "The wildcard file."}
}
}
diff --git a/deconstructsigs.wdl b/deconstructsigs.wdl
new file mode 100644
index 00000000..c44bf9c0
--- /dev/null
+++ b/deconstructsigs.wdl
@@ -0,0 +1,66 @@
+# Copyright (c) 2021 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+version 1.0
+
+task DeconstructSigs {
+ input {
+ File signaturesMatrix
+ File signaturesReference
+ String outputPath = "./signatures.rds"
+
+ Int timeMinutes = 15
+ String memory = "4GiB"
+ String dockerImage = "quay.io/biocontainers/r-deconstructsigs:1.9.0--r41hdfd78af_1"
+ }
+
+ command {
+ R --no-echo << EOF
+ library(deconstructSigs)
+ tumor <- read.table("~{signaturesMatrix}", check.names=F)
+ ref <- data.frame(t(read.table("~{signaturesReference}", check.names=F, header=T, row.names="Type")), check.names=F)
+ tumor <- tumor[,colnames(ref)]
+
+ sigs <- whichSignatures(tumor.ref=tumor, row.names(tumor), signatures.ref=ref, contexts.needed=T)
+ saveRDS(sigs, "~{outputPath}")
+ EOF
+ }
+
+ output {
+ File signatureRDS = outputPath
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ signaturesMatrix: {description: "A table containing columns represtenting mutation types (matching the types in the signatures reference) and one row with the counts for each of these types for the sample of intrest.",
+ category: "required"}
+ signaturesReference: {description: "A table describing the mutational signatures, formatted like those provided by COSMIC.",
+ category: "required"}
+ outputPath: {description: "The location the output will be written to.", category: "common"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ memory: {description: "The amount of memory available to the job.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ }
+}
\ No newline at end of file
diff --git a/deepvariant.wdl b/deepvariant.wdl
new file mode 100644
index 00000000..b0ed2a19
--- /dev/null
+++ b/deepvariant.wdl
@@ -0,0 +1,109 @@
+version 1.0
+
+# Copyright (c) 2018 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task RunDeepVariant {
+ input {
+ File referenceFasta
+ File referenceFastaIndex
+ File inputBam
+ File inputBamIndex
+ String modelType
+ String outputVcf = "sample.vcf.gz"
+ String? postprocessVariantsExtraArgs
+ File? customizedModel
+ Int numShards = 8
+ String? outputGVcf
+ String? outputGVcfIndex
+ File? regions
+ String? sampleName
+ Boolean VCFStatsReport = true
+
+ # Most of the memory used is at the end, in the step where the variants
+ # are merged. This is a single-threaded high memory step. The number
+ # of shards does not influence the memory so much.
+ # The provided memory here is enough for merge human chromosome 1.
+ String memory = "48GiB"
+ Int timeMinutes = 5000
+ # Version 1.8.0 has a bug.
+ # https://github.com/google/deepvariant/issues/912
+ String dockerImage = "google/deepvariant:1.6.1"
+ }
+
+ command {
+ set -e
+ /opt/deepvariant/bin/run_deepvariant \
+ --ref ~{referenceFasta} \
+ --reads ~{inputBam} \
+ --model_type ~{modelType} \
+ --output_vcf ~{outputVcf} \
+ ~{"--output_gvcf " + outputGVcf} \
+ ~{"--customized_model " + customizedModel} \
+ ~{"--num_shards " + numShards} \
+ ~{"--regions " + regions} \
+ ~{"--sample_name " + sampleName} \
+ ~{"--postprocess_variants_extra_args " + postprocessVariantsExtraArgs} \
+ ~{true="--vcf_stats_report" false="--novcf_stats_report" VCFStatsReport}
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ cpu: numShards
+ }
+
+ output {
+ File outputVCF = outputVcf
+ File outputVCFIndex = outputVcf + ".tbi"
+ Array[File] outputVCFStatsReport = glob("*.visual_report.html")
+ File? outputGVCF = outputGVcf
+ File? outputGVCFIndex = outputGVcfIndex
+ }
+
+ parameter_meta {
+ # inputs
+ referenceFasta: {description: "Genome reference to use.", category: "required"}
+ referenceFastaIndex: {description: "Index for the genome reference file.", category: "required"}
+ inputBam: {description: "Aligned, sorted, indexed BAM file containing the reads we want to call.", category: "required"}
+ inputBamIndex: {description: "Index for the input bam file.", category: "required"}
+ modelType: {description: ". Type of model to use for variant calling. Each model_type has an associated default model, which can be overridden by the --customized_model flag.", category: "required"}
+ outputVcf: {description: "Path where we should write VCF file.", category: "required"}
+ postprocessVariantsExtraArgs: {description: "A comma-separated list of flag_name=flag_value. 'flag_name' has to be valid flags for calpostprocess_variants.py.", category: "advanced"}
+ customizedModel: {description: "A path to a model checkpoint to load for the `call_variants` step. If not set, the default for each --model_type will be used.", category: "advanced"}
+ numShards: {description: "Number of shards for make_examples step.", category: "common"}
+ outputGVcf: {description: "Path where we should write gVCF file.", category: "common"}
+ outputGVcfIndex: {description: "Path to where the gVCF index file will be written. This is needed as a workaround, set it to `outputGVcf + '.tbi.'`", category: "common"}
+ regions: {description: "List of regions we want to process, in BED/BEDPE format.", category: "advanced"}
+ sampleName: {description: "Sample name to use instead of the sample name from the input reads BAM (SM tag in the header).", category: "common"}
+ VCFStatsReport: {description: "Output a visual report (HTML) of statistics about the output VCF.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVCF: {description: "Output VCF file."}
+ outputVCFIndex: {description: "Index of output VCF file."}
+ outputVCFStatsReport: {description: "Statistics file."}
+ outputGVCF: {description: "GVCF version of VCF file(s)."}
+ outputGVCFIndex: {description: "Index of GVCF file(s)."}
+ }
+}
diff --git a/delly.wdl b/delly.wdl
index efa1bf60..b952da7e 100644
--- a/delly.wdl
+++ b/delly.wdl
@@ -1,7 +1,5 @@
version 1.0
-# MIT License
-#
# Copyright (c) 2018 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -24,15 +22,18 @@ version 1.0
task CallSV {
input {
- File bamFile
- File bamIndex
+ Array[File]+ bamFile
+ Array[File]+ bamIndex
File referenceFasta
File referenceFastaFai
- String outputPath = "./delly/delly.vcf"
+ String outputPath = "./delly/delly.bcf"
- String memory = "15G"
- Int timeMinutes = 300
- String dockerImage = "quay.io/biocontainers/delly:0.8.1--h4037b6b_1"
+ File? genotypeBcf
+ File? genotypeBcfIndex
+
+ String memory = "15GiB"
+ Int timeMinutes = 600
+ String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0"
}
command {
@@ -41,11 +42,13 @@ task CallSV {
delly call \
-o ~{outputPath} \
-g ~{referenceFasta} \
- ~{bamFile}
+ ~{"-v " + genotypeBcf} \
+ ~{sep=" " bamFile}
}
output {
File dellyBcf = outputPath
+ File dellyBcfIndex = outputPath + ".csi"
}
runtime {
@@ -56,13 +59,68 @@ task CallSV {
parameter_meta {
# inputs
- bamFile: {description: "The bam file to process.", category: "required"}
- bamIndex: {description: "The index bam file.", category: "required"}
+ bamFile: {description: "The bam files to process.", category: "required"}
+ bamIndex: {description: "The indexes for the bam files.", category: "required"}
referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"}
- referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" }
- outputPath: {description: "The location the output VCF file should be written.", category: "common"}
- memory: {description: "The memory required to run the programs", category: "advanced"}
+ referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" }
+ outputPath: {description: "The location the output BCF file should be written.", category: "common"}
+ genotypeBcf: {description: "A BCF with SVs to get genotyped in the samples.", category: "advanced"}
+ genotypeBcfIndex: {description: "The index for the genotype BCF file.", category: "advanced"}
+ memory: {description: "The memory required to run the programs.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ dellyBcf: {description: "File containing structural variants."}
}
}
+
+
+task SomaticFilter {
+ input {
+ File dellyBcf
+ File dellyBcfIndex
+ Array[String]+ normalSamples
+ Array[String]+ tumorSamples
+ String outputPath = "./delly/delly_filter.bcf"
+
+ String memory = "15GiB"
+ Int timeMinutes = 300
+ String dockerImage = "quay.io/biocontainers/delly:1.1.6--ha41ced6_0"
+ }
+
+ command <<<
+ set -e
+ mkdir -p "$(dirname ~{outputPath})"
+ for SAMPLE in ~{sep=" " normalSamples}; do echo -e "${SAMPLE}\tcontrol" >> samples.tsv; done
+ for SAMPLE in ~{sep=" " tumorSamples}; do echo -e "${SAMPLE}\ttumor" >> samples.tsv; done
+
+ delly filter \
+ -f somatic \
+ -o ~{outputPath} \
+ -s samples.tsv \
+ ~{dellyBcf}
+ >>>
+
+ output {
+ File filterBcf = outputPath
+ File filterBcfIndex = outputPath + ".csi"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ dellyBcf: {description: "The BCF file produced by delly.", category: "required"}
+ dellyBcfIndex: {description: "The index for the delly BCF file.", category: "required"}
+ normalSamples: {description: "The names for the normal samples as used in the delly BCF file.", category: "required"}
+ tumorSamples: {description: "The names for the tumor samples as used in the delly BCF file.", category: "required"}
+ outputPath: {description: "The location the output BCF file should be written.", category: "common"}
+ memory: {description: "The memory required to run the programs.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ }
+}
\ No newline at end of file
diff --git a/duphold.wdl b/duphold.wdl
new file mode 100644
index 00000000..0426da56
--- /dev/null
+++ b/duphold.wdl
@@ -0,0 +1,75 @@
+version 1.0
+
+# Copyright (c) 2020 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Duphold {
+ input {
+ File inputVcf
+ File bamFile
+ File bamIndex
+ File referenceFasta
+ File referenceFastaFai
+ String sample
+ String outputPath = "./duphold.vcf"
+
+ String memory = "15GiB"
+ Int timeMinutes = 1440
+ String dockerImage = "quay.io/biocontainers/duphold:0.2.1--h516909a_1"
+ }
+
+ command {
+ set -e
+ mkdir -p "$(dirname ~{outputPath})"
+ export DUPHOLD_SAMPLE_NAME=~{sample}
+ duphold \
+ -v ~{inputVcf} \
+ -b ~{bamFile} \
+ -f ~{referenceFasta} \
+ -o ~{outputPath}
+ }
+
+ output {
+ File outputVcf = outputPath
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ inputVcf: {description: "The VCF file to process.", category: "required"}
+ bamFile: {description: "The bam file to process.", category: "required"}
+ bamIndex: {description: "The index of the bam file.", category: "required"}
+ referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"}
+ referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" }
+ sample: {description: "The name of the sample.", category: "required"}
+ outputPath: {description: "The location the output VCF file should be written.", category: "common"}
+ memory: {description: "The memory required to run the programs.", category: "advanced"}
+ timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVcf: {description: "Duphold annotated VCF file."}
+ }
+}
diff --git a/extractSigPredictHRD.wdl b/extractSigPredictHRD.wdl
new file mode 100644
index 00000000..1520b608
--- /dev/null
+++ b/extractSigPredictHRD.wdl
@@ -0,0 +1,71 @@
+version 1.0
+
+# Copyright (c) 2021 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task ExtractSigPredictHRD {
+ input {
+ String outputDir = "."
+ String sampleName
+ File snvIndelVcf
+ File snvIndelVcfIndex
+ File svVcf
+ File svVcfIndex
+ Boolean hg38 = false
+
+ String memory = "3GiB"
+ Int timeMinutes = 10
+ String dockerImage = "quay.io/biowdl/chord-mutsigextractor:2.00_1.14"
+ }
+
+ command {
+ extractSigPredictHRD.R \
+ ~{outputDir} \
+ ~{sampleName} \
+ ~{snvIndelVcf} \
+ ~{svVcf} \
+ ~{if hg38 then "RG_38" else "RG_37"}
+ }
+
+ output {
+ File chordPrediction = "~{outputDir}/~{sampleName}_chord_prediction.txt"
+ File chordSignatures = "~{outputDir}/~{sampleName}_chord_signatures.txt"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ outputDir: {description: "The directory the outout will be written to.", category: "required"}
+ sampleName: {description: "The name of the sample.", category: "required"}
+ snvIndelVcf: {description: "A VCF file with SNVs and indels.", category: "required"}
+ snvIndelVcfIndex: {description: "The index for the SNV/indel VCF file.", category: "required"}
+ svVcf: {description: "A VCF file with SVs.", category: "required"}
+ svVcfIndex: {description: "The index for the SV VCF file.", category: "required"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
\ No newline at end of file
diff --git a/fastp.wdl b/fastp.wdl
new file mode 100644
index 00000000..9849738b
--- /dev/null
+++ b/fastp.wdl
@@ -0,0 +1,124 @@
+version 1.0
+
+# MIT License
+#
+# Copyright (c) 2022 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Fastp {
+ input {
+ File read1
+ File read2
+ String outputPathR1
+ String outputPathR2
+ String htmlPath
+ String jsonPath
+
+ Int compressionLevel = 1
+ Boolean correction = false
+ Int lengthRequired = 15
+ Int? split
+ Boolean performAdapterTrimming = true
+ Boolean performQualityFiltering = true
+ Boolean performLengthFiltering = true
+ Boolean? performPolyGTrimming
+
+ Int threads = 4
+ String memory = "50GiB"
+ Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 6.0 / threads)
+ String dockerImage = "quay.io/biocontainers/fastp:0.23.2--h5f740d0_3"
+
+ Int? noneInt
+ }
+
+ String outputDirR1 = sub(outputPathR1, basename(outputPathR1), "")
+ String outputDirR2 = sub(outputPathR2, basename(outputPathR2), "")
+
+ String polyGTrimmingFlag = if defined(performPolyGTrimming)
+ then
+ if select_first([performPolyGTrimming]) then "--trim_poly_g" else "--disable_trim_poly_g"
+ else ""
+
+ Int? effectiveSplit = if select_first([split, 1]) > 1 then split else noneInt
+
+ command <<<
+ set -e
+ mkdir -p $(dirname ~{outputPathR1})
+ mkdir -p $(dirname ~{outputPathR2})
+ mkdir -p $(dirname ~{htmlPath})
+ mkdir -p $(dirname ~{jsonPath})
+
+ # predict output paths
+ seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR1}/"$0".~{basename(outputPathR1)}"}' > r1_paths
+ seq 1 ~{if defined(effectiveSplit) then effectiveSplit else "2"} | awk '{print "~{outputDirR2}/"$0".~{basename(outputPathR2)}"}' > r2_paths
+ fastp \
+ -i ~{read1} \
+ ~{"-I " + read2} \
+ -o ~{outputPathR1} \
+ ~{"-O " + outputPathR2} \
+ -h ~{htmlPath} \
+ -j ~{jsonPath} \
+ -z ~{compressionLevel} \
+ ~{if correction then "--correction" else ""} \
+ --length_required ~{lengthRequired} \
+ --thread ~{select_first([effectiveSplit, threads])} \
+ ~{"--split " + effectiveSplit} \
+ ~{if defined(effectiveSplit) then "-d 0" else ""} \
+ ~{if performAdapterTrimming then "" else "--disable_adapter_trimming"} \
+ ~{if performQualityFiltering then "" else "--disable_quality_filtering"} \
+ ~{if performLengthFiltering then "" else "--disable_length_filtering"} \
+ ~{polyGTrimmingFlag}
+ >>>
+
+ output {
+ File htmlReport = htmlPath
+ File jsonReport = jsonPath
+ Array[File] clippedR1 = if defined(effectiveSplit) then read_lines("r1_paths") else [outputPathR1]
+ Array[File] clippedR2 = if defined(effectiveSplit) then read_lines("r2_paths") else [outputPathR2]
+ }
+
+ runtime {
+ cpu: select_first([effectiveSplit, threads])
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ read1: {description: "The R1 fastq file.", category: "required"}
+ read2: {description: "The R2 fastq file.", category: "required"}
+ outputPathR1: {description: "The output path for the R1 file.", category: "required"}
+ outputPathR2: {description: "The output path for the R2 file.", category: "required"}
+ htmlPath: {description: "The path to write the html report to.", category: "required"}
+ jsonPath: {description: "The path to write the json report to.", category: "required"}
+ compressionLevel: {description: "The compression level to use for the output.", category: "advanced"}
+ correction: {description: "Whether or not to apply overlap based correction.", category: "advanced"}
+ lengthRequired: {description: "The minimum read length.", category: "advanced"}
+ split: {description: "The number of chunks to split the files into. Number of threads will be set equal to the amount of splits.", category: "common"}
+ performAdapterTrimming: {description: "Whether adapter trimming should be performed or not.", category: "advanced"}
+ performQualityFiltering: {description: "Whether reads should be filtered based on quality scores.", category: "advanced"}
+ performLengthFiltering: {description: "Whether reads shoulde be filtered based on lengths.", catgegory: "advanced"}
+ performPolyGTrimming: {description: "Whether or not poly-G-tail trimming should be performed. If undefined fastp's default behaviour will be used, ie. enabled for NextSeq/NovaSeq data as detected from read headers.", category: "advanced"}
+ threads: {description: "The number of threads to use. Only used if the split input is not set.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ }
+}
\ No newline at end of file
diff --git a/fastqFilter.wdl b/fastqFilter.wdl
new file mode 100644
index 00000000..3701b8aa
--- /dev/null
+++ b/fastqFilter.wdl
@@ -0,0 +1,66 @@
+version 1.0
+
+# MIT License
+#
+# Copyright (c) 2023 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task FastqFilter {
+ input {
+ Array[File]+ fastq
+ Array[String]+ outputPaths
+ Int? minLength
+ Int? maxLength
+
+ String memory = "1GiB"
+ Int timeMinutes = 1 + ceil(size(fastq, "G"))
+ String dockerImage = "quay.io/biocontainers/fastq-filter:0.3.0--py39hf95cd2a_1"
+ }
+
+ command {
+ set -e
+ mkdir -p $(dirname ~{sep=" " outputPaths})
+ fastq-filter \
+ -o ~{sep=" -o " outputPaths} \
+ ~{"-l " + minLength} \
+ ~{"-L " + maxLength} \
+ ~{sep=" " fastq}
+ }
+
+ output {
+ Array[File] filtered = outputPaths
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ fastq: {description: "A list of fastq files to filter.", category: "required"}
+ outputPaths: {description: "A list containing the output paths for each input fastq file.", category: "required"}
+ minLength: {description: "Equivalent to fastq-filter's `--min-length` option.", category: "common"}
+ maxLength: {description: "Equivalent to fastq-filter's `--max-length` option.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ }
+}
\ No newline at end of file
diff --git a/fastqc.wdl b/fastqc.wdl
index 04b6813f..da31882c 100644
--- a/fastqc.wdl
+++ b/fastqc.wdl
@@ -29,6 +29,7 @@ task Fastqc {
Boolean noFilter = false
Boolean extract = false
Boolean nogroup = false
+
Int? minLength
String? format
File? contaminants
@@ -37,33 +38,36 @@ task Fastqc {
Int? kmers
String? dir
- Int threads = 1
# Set javaXmx a little high. Equal to fastqc default with 7 threads.
# This is because some fastq files need more memory. 2G per core
# is a nice cluster default, so we use all the rest of the memory for
# fastqc so we should have as little OOM crashes as possible even with
# weird edge case fastq's.
- String javaXmx="1750M"
- String memory = "2G"
+ String javaXmx="1750M"
+ Int threads = 1
+ String memory = "2GiB"
Int timeMinutes = 1 + ceil(size(seqFile, "G")) * 4
- String dockerImage = "quay.io/biocontainers/fastqc:0.11.9--0"
- Array[File]? NoneArray
- File? NoneFile
+ String dockerImage = "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0"
+
+ Array[File]? noneArray
+ File? noneFile
}
# Chops of the .gz extension if present.
- # The Basename needs to be taken here. Otherwise paths might differ between similar jobs.
+ # The Basename needs to be taken here. Otherwise paths might differ
+ # between similar jobs.
String name = basename(sub(seqFile, "\.gz$",""))
- # This regex chops of the extension and replaces it with _fastqc for the reportdir.
+ # This regex chops of the extension and replaces it with _fastqc for
+ # the reportdir.
# Just as fastqc does it.
String reportDir = outdirPath + "/" + sub(name, "\.[^\.]*$", "_fastqc")
- # We reimplement the perl wrapper here. This has the advantage that it gives
- # us more control over the amount of memory used.
+ # We reimplement the perl wrapper here. This has the advantage that it
+ # gives us more control over the amount of memory used.
command <<<
set -e
- mkdir -p ~{outdirPath}
- FASTQC_DIR="/usr/local/opt/fastqc-0.11.9"
+ mkdir -p "~{outdirPath}"
+ FASTQC_DIR="/usr/local/opt/fastqc-0.12.1"
export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar"
java -Djava.awt.headless=true -XX:ParallelGCThreads=1 \
-Xms200M -Xmx~{javaXmx} \
@@ -86,23 +90,24 @@ task Fastqc {
>>>
output {
- File? rawReport = if extract then reportDir + "/fastqc_data.txt" else NoneFile
File htmlReport = reportDir + ".html"
File reportZip = reportDir + ".zip"
- File? summary = if extract then reportDir + "/summary.txt" else NoneFile
- Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else NoneArray
+ File? summary = if extract then reportDir + "/summary.txt" else noneFile
+ File? rawReport = if extract then reportDir + "/fastqc_data.txt" else noneFile
+ Array[File]? images = if extract then glob(reportDir + "/Images/*.png") else noneArray
}
runtime {
cpu: threads
memory: memory
- docker: dockerImage
time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
seqFile: {description: "A fastq file.", category: "required"}
- outdirPath: {description: "The path to write the output to", catgory: "required"}
+ outdirPath: {description: "The path to write the output to.", catgory: "required"}
casava: {description: "Equivalent to fastqc's --casava flag.", category: "advanced"}
nano: {description: "Equivalent to fastqc's --nano flag.", category: "advanced"}
noFilter: {description: "Equivalent to fastqc's --nofilter flag.", category: "advanced"}
@@ -115,24 +120,30 @@ task Fastqc {
limits: {description: "Equivalent to fastqc's --limits option.", category: "advanced"}
kmers: {description: "Equivalent to fastqc's --kmers option.", category: "advanced"}
dir: {description: "Equivalent to fastqc's --dir option.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
threads: {description: "The number of cores to use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ htmlReport: {description: "HTML report file."}
+ reportZip: {description: "Source data file."}
+ summary: {description: "Summary file."}
+ rawReport: {description: "Raw report file."}
+ images: {description: "Images in report file."}
}
meta {
WDL_AID: {
- exclude: ["NoneFile", "NoneArray"]
+ exclude: ["noneFile", "noneArray"]
}
}
}
task GetConfiguration {
input {
+ String memory = "2G" # Needs more than 1 to pull the docker image.
Int timeMinutes = 1
String dockerImage = "quay.io/biocontainers/fastqc:0.11.7--4"
}
@@ -155,14 +166,20 @@ task GetConfiguration {
}
runtime {
- memory: "2G" # Needs more than 1 to pull the docker image
+ memory: memory
time_minute: timeMinutes
docker: dockerImage
}
parameter_meta {
+ # inputs
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ memory: {description: "The amount of memory available to the job.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ adapterList: {description: "List of adapters found."}
+ contaminantList: {description: "List of contaminants found."}
+ limits: {description: "Limits file."}
}
}
diff --git a/fastqsplitter.wdl b/fastqsplitter.wdl
index c523cf8a..4a02697c 100644
--- a/fastqsplitter.wdl
+++ b/fastqsplitter.wdl
@@ -1,7 +1,5 @@
version 1.0
-# MIT License
-#
# Copyright (c) 2019 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -26,19 +24,24 @@ task Fastqsplitter {
input {
File inputFastq
Array[String]+ outputPaths
- String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1"
+
Int? compressionLevel
Int? threadsPerFile
- # fastqplitter utilizes one thread per input file and one or more threads per output file + one thread for the application.
- # Since a compression level of 1 is used, each output file uses approx 0.5 cores.
+
+ # fastqplitter utilizes one thread per input file and one or
+ # more threads per output file + one thread for the application.
+ # Since a compression level of 1 is used, each output file
+ # uses approx 0.5 cores.
Int cores = 1 + ceil(0.5 * length(outputPaths))
+ String dockerImage = "quay.io/biocontainers/fastqsplitter:1.1.0--py37h516909a_1"
}
# Busybox mkdir does not accept multiple paths.
command <<<
set -e
for FILE in ~{sep=' ' outputPaths}
- do mkdir -p "$(dirname $FILE)"
+ do
+ mkdir -p "$(dirname ${FILE})"
done
fastqsplitter \
~{"-c " + compressionLevel} \
@@ -51,15 +54,16 @@ task Fastqsplitter {
Array[File] chunks = outputPaths
}
- # Using very safe margins here. 10MB/300MB per outputfile is used for single-threaded/multi-threaded compression.
+ # Using very safe margins here. 10MB/300MB per outputfile is used for
+ # single-threaded/multi-threaded compression.
Float memoryPerFile = if select_first([threadsPerFile, 1]) > 1 then 0.40 else 0.02
Int fastqsplitterMemory = ceil(0.100 + memoryPerFile * length(outputPaths))
- # Make sure a minimum of 2 GB is present to pull the singularity image
+ # Make sure a minimum of 2 GB is present to pull the singularity image.
Int memory = if fastqsplitterMemory <= 2 then 2 else fastqsplitterMemory
runtime {
- memory: "~{memory}G"
- docker: dockerImage
cpu: cores
+ memory: "~{memory}GiB"
+ docker: dockerImage
}
}
diff --git a/fgbio.wdl b/fgbio.wdl
new file mode 100644
index 00000000..15fb0ea4
--- /dev/null
+++ b/fgbio.wdl
@@ -0,0 +1,68 @@
+version 1.0
+
+# Copyright (c) 2017 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task AnnotateBamWithUmis {
+ input {
+ File inputBam
+ File inputUmi
+ String outputPath
+
+ String memory = "120GiB"
+ Int timeMinutes = 360
+ String javaXmx="100G"
+ String dockerImage = "quay.io/biocontainers/fgbio:1.4.0--hdfd78af_0"
+ }
+
+ command {
+ set -e
+ mkdir -p "$(dirname ~{outputPath})"
+ fgbio -Xmx~{javaXmx} \
+ AnnotateBamWithUmis \
+ -i ~{inputBam} \
+ -f ~{inputUmi} \
+ -o ~{outputPath}
+ }
+
+ output {
+ File outputBam = outputPath
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ inputBam: {description: "The input BAM file.", category: "required"}
+ inputUmi: {description: "The input fastq file with UMIs.", category: "required"}
+ outputPath: {description: "Output directory path + output file.", category: "required"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputBam: {description: "UMI-annotated output BAM file."}
+ }
+}
diff --git a/flash.wdl b/flash.wdl
index 6e704921..7b50e0d7 100644
--- a/flash.wdl
+++ b/flash.wdl
@@ -24,16 +24,17 @@ import "common.wdl" as common
task Flash {
input {
- String? preCommand
FastqPair inputFastq
String outdirPath
String outPrefix = "flash"
+ Boolean compress = true
+
+ String? preCommand
Int? minOverlap
Int? maxOverlap
- Boolean compress = true
Int threads = 2
- String memory = "2G"
+ String memory = "2GiB"
}
command {
@@ -55,8 +56,8 @@ task Flash {
File notCombined1 = outdirPath + "/" + outPrefix + ".notCombined_1.fastq.gz"
File notCombined2 = outdirPath + "/" + outPrefix + ".notCombined_2.fastq.gz"
FastqPair notCombined = object {
- R1: notCombined1,
- R2: notCombined2
+ R1: notCombined1,
+ R2: notCombined2
}
File hist = outdirPath + "/" + outPrefix + ".hist"
File histogram = outdirPath + "/" + outPrefix + ".histogram"
@@ -66,5 +67,4 @@ task Flash {
cpu: threads
memory: memory
}
-
-}
\ No newline at end of file
+}
diff --git a/gatk.wdl b/gatk.wdl
index e0209a0c..655a0b66 100644
--- a/gatk.wdl
+++ b/gatk.wdl
@@ -28,12 +28,13 @@ task AnnotateIntervals {
String annotatedIntervalsPath = "intervals.annotated.tsv"
File intervals
String intervalMergingRule = "OVERLAPPING_ONLY"
+ Int featureQueryLookahead = 1000000
+
File? mappabilityTrack
File? segmentalDuplicationTrack
- Int featureQueryLookahead = 1000000
- String memory = "3G"
String javaXmx = "2G"
+ String memory = "3GiB"
Int timeMinutes = 5
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -57,12 +58,13 @@ task AnnotateIntervals {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
@@ -71,17 +73,18 @@ task AnnotateIntervals {
intervalMergingRule: {description: "Equivalent to gatk AnnotateIntervals' `--interval-merging-rule` option.", category: "advanced"}
mappabilityTrack: {description: "Equivalent to gatk AnnotateIntervals' `--mappability-track` option.", category: "common"}
segmentalDuplicationTrack: {description: "Equivalent to gatk AnnotateIntervals' `--segmenta-duplicarion-track` option.", category: "common"}
- featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option", category: "advanced"}
+ featureQueryLookahead: {description: "Equivalent to gatk AnnotateIntervals' `--feature-query-lookahead` option.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ annotatedIntervals: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a sequence dictionary, a row specifying the column headers for the contained annotations, and the corresponding entry rows."}
}
}
-# Apply Base Quality Score Recalibration (BQSR) model
+# Apply Base Quality Score Recalibration (BQSR) model.
task ApplyBQSR {
input {
File inputBam
@@ -93,9 +96,11 @@ task ApplyBQSR {
File referenceFastaDict
File referenceFastaFai
- Int memoryMb = javaXmxMb + 512
Int javaXmxMb = 2048
- Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used.
+ Int memoryMb = javaXmxMb + 512
+ # This will likely be used with intervals, as such size based
+ # estimation can't be used.
+ Int timeMinutes = 120
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -124,33 +129,34 @@ task ApplyBQSR {
}
runtime {
- docker: dockerImage
+ memory: "~{memoryMb}MiB"
time_minutes: timeMinutes
- memory: "~{memoryMb}M"
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputBam: {description: "The BAM file which should be recalibrated.", category: "required"}
inputBamIndex: {description: "The input BAM file's index.", category: "required"}
outputBamPath: {description: "The location the resulting BAM file should be written.", category: "required"}
recalibrationReport: {description: "The BQSR report the be used for recalibration.", category: "required"}
sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"}
- referenceFasta: {description: "The reference fasta file which was also used for mapping.",
- category: "required"}
- referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
- category: "required"}
+ referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
-
+ javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"}
memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
- javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ recalibratedBam: {description: "A BAM file containing the recalibrated read data."}
+ recalibratedBamIndex: {description: "Index of recalibrated BAM file."}
+ recalibratedBamMd5: {description: "MD5 of recalibrated BAM file."}
}
}
-# Generate Base Quality Score Recalibration (BQSR) model
+# Generate Base Quality Score Recalibration (BQSR) model.
task BaseRecalibrator {
input {
File inputBam
@@ -159,14 +165,15 @@ task BaseRecalibrator {
Array[File] sequenceGroupInterval = []
Array[File] knownIndelsSitesVCFs = []
Array[File] knownIndelsSitesVCFIndexes = []
- File? dbsnpVCF
- File? dbsnpVCFIndex
File referenceFasta
File referenceFastaDict
File referenceFastaFai
- Int memoryMb = javaXmxMb + 512
+ File? dbsnpVCF
+ File? dbsnpVCFIndex
+
Int javaXmxMb = 1024
+ Int memoryMb = javaXmxMb + 512
Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used.
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -190,42 +197,42 @@ task BaseRecalibrator {
}
runtime {
- docker: dockerImage
+ memory: "~{memoryMb}MiB"
time_minutes: timeMinutes
- memory: "~{memoryMb}M"
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputBam: {description: "The BAM file to generate a BQSR report for.", category: "required"}
inputBamIndex: {description: "The index of the input BAM file.", category: "required"}
recalibrationReportPath: {description: "The location to write the BQSR report to.", category: "required"}
sequenceGroupInterval: {description: "Bed files describing the regions to operate on.", category: "advanced"}
knownIndelsSitesVCFs: {description: "VCF files with known indels.", category: "advanced"}
knownIndelsSitesVCFIndexes: {description: "The indexed for the known variant VCFs.", category: "advanced"}
+ referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
+ referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
dbsnpVCF: {description: "A dbSNP VCF.", category: "common"}
dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"}
- referenceFasta: {description: "The reference fasta file which was also used for mapping.",
- category: "required"}
- referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
- category: "required"}
- referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
-
+ javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"}
memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
- javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ recalibrationReport: {description: "A GATK Report file with many tables."}
}
}
task CalculateContamination {
input {
File tumorPileups
+
File? normalPileups
- String memory = "13G"
String javaXmx = "12G"
+ String memory = "13GiB"
Int timeMinutes = 180
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -246,20 +253,23 @@ task CalculateContamination {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
tumorPileups: {description: "The pileup summary of a tumor/case sample.", category: "required"}
normalPileups: {description: "The pileup summary of the normal/control sample.", category: "common"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ contaminationTable: {description: "Table with fractions of reads from cross-sample contamination."}
+ mafTumorSegments: {description: "Tumor segments table."}
}
}
@@ -268,8 +278,8 @@ task CallCopyRatioSegments {
String outputPrefix
File copyRatioSegments
- String memory = "3G"
String javaXmx = "2G"
+ String memory = "3GiB"
Int timeMinutes = 2
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -289,20 +299,23 @@ task CallCopyRatioSegments {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
outputPrefix: {description: "The prefix for the output files.", category: "required"}
copyRatioSegments: {description: "The copy ratios file generated by gatk ModelSegments.", category: "required"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ calledSegments: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CalledCopyRatioSegmentCollection.CalledCopyRatioSegmentTableColumn, and the corresponding entry rows."}
+ calledSegmentsIgv: {description: "This is a tab-separated values (TSV) file with CBS-format column headers and the corresponding entry rows that can be plotted using IGV."}
}
}
@@ -310,15 +323,16 @@ task CollectAllelicCounts {
input {
String allelicCountsPath = "allelic_counts.tsv"
File commonVariantSites
- File? commonVariantSitesIndex
File inputBam
File inputBamIndex
File referenceFasta
File referenceFastaDict
File referenceFastaFai
- String memory = "11G"
+ File? commonVariantSitesIndex
+
String javaXmx = "10G"
+ String memory = "11GiB"
Int timeMinutes = 120
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -339,26 +353,28 @@ task CollectAllelicCounts {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
allelicCountsPath: {description: "The path the output should be written to.", category: "advanced"}
commonVariantSites: {description: "Interval list or vcf of common variant sites (to retrieve the allelic counts for).", category: "required"}
- commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"}
inputBam: {description: "The BAM file to generate counts for.", category: "required"}
inputBamIndex: {description: "The index of the input BAM file.", category: "required"}
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ commonVariantSitesIndex: {description: "The index for commonVariantSites.", category: "common"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ allelicCounts: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in AllelicCountCollection.AllelicCountTableColumn, and the corresponding entry rows."}
}
}
@@ -373,8 +389,8 @@ task CollectReadCounts {
File referenceFastaFai
String intervalMergingRule = "OVERLAPPING_ONLY"
- String memory = "8G"
String javaXmx = "7G"
+ String memory = "8GiB"
Int timeMinutes = 1 + ceil(size(inputBam, "G") * 5)
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -397,12 +413,13 @@ task CollectReadCounts {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
countsPath: {description: "The location the output should be written to.", category: "advanced"}
intervals: {description: "The intervals to collect counts for.", category: "required"}
inputBam: {description: "The BAM file to determine the coverage for.", category: "required"}
@@ -411,12 +428,13 @@ task CollectReadCounts {
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
intervalMergingRule: {description: "Equivalent to gatk CollectReadCounts' `--interval-merging-rule` option.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ counts: {description: "Read counts at specified intervals."}
}
}
@@ -430,8 +448,8 @@ task CombineGVCFs {
File referenceFastaDict
File referenceFastaFai
- String memory = "5G"
String javaXmx = "4G"
+ String memory = "5GiB"
Int timeMinutes = 1 + ceil(size(gvcfFiles, "G") * 8)
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -453,28 +471,28 @@ task CombineGVCFs {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
gvcfFiles: {description: "The GVCF files to be combined.", category: "required"}
gvcfFilesIndex: {description: "The indexes for the GVCF files.", caregory: "required"}
intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"}
outputPath: {description: "The location the combined GVCF should be written to.", category: "required"}
- referenceFasta: {description: "The reference fasta file which was also used for mapping.",
- category: "required"}
- referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
- category: "required"}
+ referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
-
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVcf: {description: "A combined multi-sample gVCF."}
+ outputVcfIndex: {description: "Index of the output file."}
}
}
@@ -486,12 +504,12 @@ task CombineVariants {
String genotypeMergeOption = "UNIQUIFY"
String filteredRecordsMergeType = "KEEP_IF_ANY_UNFILTERED"
Array[String]+ identifiers
- Array[File]+ variantVcfs # follow "identifiers" array order
+ Array[File]+ variantVcfs # Follow "identifiers" array order.
Array[File]+ variantIndexes
String outputPath
- String memory = "13G"
String javaXmx = "12G"
+ String memory = "13GiB"
Int timeMinutes = 180
String dockerImage = "broadinstitute/gatk3:3.8-1"
}
@@ -499,17 +517,17 @@ task CombineVariants {
command <<<
set -e
mkdir -p "$(dirname ~{outputPath})"
-
- # build "-V: " arguments according to IDs and VCFs to merge
- # Make sure commands are run in bash
+ # Build "-V: " arguments according to IDs
+ # and VCFs to merge.
+ # Make sure commands are run in bash.
V_args=$(bash -c '
set -eu
ids=(~{sep=" " identifiers})
vars=(~{sep=" " variantVcfs})
for (( i = 0; i < ${#ids[@]}; ++i ))
- do
+ do
printf -- "-V:%s %s " "${ids[i]}" "${vars[i]}"
- done
+ done
')
java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 -jar /usr/GenomeAnalysisTK.jar \
-T CombineVariants \
@@ -526,12 +544,13 @@ task CombineVariants {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
@@ -540,14 +559,15 @@ task CombineVariants {
identifiers: {description: "The sample identifiers in the same order as variantVcfs.", category: "required"}
variantVcfs: {description: "The input VCF files in the same order as identifiers.", category: "required"}
variantIndexes: {description: "The indexes of the input VCF files.", category: "required"}
- outputPath: {description: "The location the output should be written to", category: "required"}
-
+ outputPath: {description: "The location the output should be written to.", category: "required"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ combinedVcf: {description: "Combined VCF file."}
+ combinedVcfIndex: {description: "Index of combined VCF file."}
}
}
@@ -555,12 +575,14 @@ task CreateReadCountPanelOfNormals {
input {
String PONpath = "PON.hdf5"
Array[File]+ readCountsFiles
+
File? annotatedIntervals
- String memory = "8G"
String javaXmx = "7G"
+ String memory = "8GiB"
Int timeMinutes = 5
- String dockerImage = "broadinstitute/gatk:4.1.8.0" # The biocontainer causes a spark related error for some reason...
+ # The biocontainer causes a spark related error for some reason.
+ String dockerImage = "broadinstitute/gatk:4.1.8.0"
}
command {
@@ -578,34 +600,36 @@ task CreateReadCountPanelOfNormals {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
PONpath: {description: "The location the PON should be written to.", category: "common"}
readCountsFiles: {description: "The read counts files as generated by CollectReadCounts.", category: "required"}
- annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.",
- category: "advanced"}
+ annotatedIntervals: {description: "An annotation set of intervals as generated by AnnotateIntervals. If provided, explicit GC correction will be performed.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ PON: {description: "Panel-of-normals file."}
}
}
task DenoiseReadCounts {
input {
- File? PON
- File? annotatedIntervals
File readCounts
String outputPrefix
- String memory = "5G"
+ File? PON
+ File? annotatedIntervals
+
String javaXmx = "4G"
+ String memory = "5GiB"
Int timeMinutes = 5
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -628,23 +652,25 @@ task DenoiseReadCounts {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
- PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"}
- annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.",
- category: "advanced"}
+ # inputs
readCounts: {description: "The read counts file as generated by CollectReadCounts.", category: "required"}
outputPrefix: {description: "The prefix for the output files.", category: "required"}
+ PON: {description: "A panel of normals as generated by CreateReadCountPanelOfNormals.", category: "advanced"}
+ annotatedIntervals: {description: "An annotated set of intervals as generated by AnnotateIntervals. Will be ignored if PON is provided.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ standardizedCopyRatios: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CopyRatioCollection.CopyRatioTableColumn, and the corresponding entry rows."}
+ denoisedCopyRatios: {description: "This is a tab-separated values (TSV) file with a SAM-style header containing a read group sample name, a sequence dictionary, a row specifying the column headers contained in CopyRatioCollection.CopyRatioTableColumn, and the corresponding entry rows."}
}
}
@@ -656,14 +682,15 @@ task FilterMutectCalls {
File unfilteredVcf
File unfilteredVcfIndex
String outputVcf
+ Int uniqueAltReadCount = 4
+ File mutect2Stats
+
File? contaminationTable
File? mafTumorSegments
File? artifactPriors
- Int uniqueAltReadCount = 4
- File mutect2Stats
- String memory = "13G"
String javaXmx = "12G"
+ String memory = "13GiB"
Int timeMinutes = 60
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -692,41 +719,44 @@ task FilterMutectCalls {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
unfilteredVcf: {description: "An unfiltered VCF file as produced by Mutect2.", category: "required"}
unfilteredVcfIndex: {description: "The index of the unfiltered VCF file.", category: "required"}
outputVcf: {description: "The location the filtered VCF file should be written.", category: "required"}
+ uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"}
+ mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"}
contaminationTable: {description: "Equivalent to FilterMutectCalls' `--contamination-table` option.", category: "advanced"}
mafTumorSegments: {description: "Equivalent to FilterMutectCalls' `--tumor-segmentation` option.", category: "advanced"}
artifactPriors: {description: "Equivalent to FilterMutectCalls' `--ob-priors` option.", category: "advanced"}
- uniqueAltReadCount: {description: "Equivalent to FilterMutectCalls' `--unique-alt-read-count` option.", category: "advanced"}
- mutect2Stats: {description: "Equivalent to FilterMutectCalls' `-stats` option.", category: "advanced"}
-
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ filteredVcf: {description: "VCF file with filtered variants from a Mutect2 VCF callset."}
+ filteredVcfIndex: {description: "Index of output VCF file."}
+ filteringStats: {description: "The output filtering stats file."}
}
}
-# Combine multiple recalibration tables from scattered BaseRecalibrator runs
+# Combine multiple recalibration tables from scattered BaseRecalibrator runs.
task GatherBqsrReports {
input {
Array[File] inputBQSRreports
String outputReportPath
- Int memoryMb = 256 + javaXmxMb
Int javaXmxMb = 256
+ Int memoryMb = 256 + javaXmxMb
Int timeMinutes = 1
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -745,21 +775,22 @@ task GatherBqsrReports {
}
runtime {
- docker: dockerImage
+ memory: "~{memoryMb}MiB"
time_minutes: timeMinutes
- memory: "~{memoryMb}M"
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputBQSRreports: {description: "The BQSR reports to be merged.", category: "required"}
outputReportPath: {description: "The location of the combined BQSR report.", category: "required"}
-
+ javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
- javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputBQSRreport: {description: "Single file with scattered BQSR recalibration reports gathered into one."}
}
}
@@ -770,9 +801,11 @@ task GenomicsDBImport {
Array[File]+ intervals
String genomicsDBWorkspacePath = "genomics_db"
String genomicsDBTarFile = "genomics_db.tar.gz"
+
String? tmpDir
- String memory = "5G"
+
String javaXmx = "4G"
+ String memory = "5GiB"
Int timeMinutes = 180
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -794,25 +827,26 @@ task GenomicsDBImport {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
gvcfFiles: {description: "The gvcfFiles to be merged.", category: "required"}
gvcfFilesIndex: {description: "Indexes for the gvcfFiles.", category: "required"}
intervals: {description: "intervals over which to operate.", category: "required"}
- genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored", category: "advanced"}
- genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored", category: "advanced"}
- tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers",
- category: "advanced"}
+ genomicsDBWorkspacePath: {description: "Where the genomicsDB files should be stored.", category: "advanced"}
+ genomicsDBTarFile: {description: "Where the .tar file containing the genomicsDB should be stored.", category: "advanced"}
+ tmpDir: {description: "Alternate temporary directory in case there is not enough space. Must be mounted when using containers.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ genomicsDbTarArchive: {description: "Imported VCFs to GenomicsDB file."}
}
}
@@ -820,18 +854,19 @@ task GenotypeGVCFs {
input {
File gvcfFile
File gvcfFileIndex
- Array[File]+ intervals
String outputPath
File referenceFasta
File referenceFastaDict
File referenceFastaFai
Array[String] annotationGroups = ["StandardAnnotation"]
+
+ Array[File]? intervals
File? dbsnpVCF
File? dbsnpVCFIndex
File? pedigree
- String memory = "7G"
String javaXmx = "6G"
+ String memory = "7GiB"
Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used.
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -846,43 +881,43 @@ task GenotypeGVCFs {
~{"-D " + dbsnpVCF} \
~{"--pedigree " + pedigree} \
~{true="-G" false="" length(annotationGroups) > 0} ~{sep=" -G " annotationGroups} \
- --only-output-calls-starting-in-intervals \
-V ~{gvcfFile} \
- -L ~{sep=' -L ' intervals}
+ ~{true="--only-output-calls-starting-in-intervals" false="" defined(intervals)} \
+ ~{true="-L" false="" defined(intervals)} ~{sep=' -L ' intervals}
}
output {
File outputVCF = outputPath
File outputVCFIndex = outputPath + ".tbi"
-
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
gvcfFile: {description: "The GVCF file to be genotyped.", category: "required"}
gvcfFileIndex: {description: "The index of the input GVCF file.", category: "required"}
- intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "required"}
outputPath: {description: "The location to write the output VCF file to.", category: "required"}
- referenceFasta: {description: "The reference fasta file which was also used for mapping.",
- category: "required"}
- referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
- category: "required"}
+ referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
- annotationGroups: {description: "Which annotation groups will be used for the annotation", category: "advanced"}
+ annotationGroups: {description: "Which annotation groups will be used for the annotation.", category: "advanced"}
+ intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "optional"}
dbsnpVCF: {description: "A dbSNP VCF.", category: "common"}
dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"}
- pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"}
+ pedigree: {description: "Pedigree file for determining the population \"founders\".", category: "common"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVCF: {description: "A final VCF in which all samples have been jointly genotyped. "}
+ outputVCFIndex: {description: "Index of final VCF file."}
}
}
@@ -896,8 +931,8 @@ task GetPileupSummaries {
File sitesForContaminationIndex
String outputPrefix
- String memory = "13G"
String javaXmx = "12G"
+ String memory = "13GiB"
Int timeMinutes = 120
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -917,12 +952,13 @@ task GetPileupSummaries {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
sampleBam: {description: "A BAM file for which a pileup should be created.", category: "required"}
sampleBamIndex: {description: "The index of the input BAM file.", category: "required"}
variantsForContamination: {description: "A VCF file with common variants.", category: "required"}
@@ -930,13 +966,13 @@ task GetPileupSummaries {
sitesForContamination: {description: "A bed file describing regions to operate on.", category: "required"}
sitesForContaminationIndex: {description: "The index for the bed file.", category: "required"}
outputPrefix: {description: "The prefix for the ouput.", category: "required"}
-
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ pileups: {description: "Pileup metrics for inferring contamination."}
}
}
@@ -945,26 +981,27 @@ task HaplotypeCaller {
input {
Array[File]+ inputBams
Array[File]+ inputBamsIndex
- Array[File]+? intervalList
- Array[File]+? excludeIntervalList
String outputPath
File referenceFasta
File referenceFastaIndex
File referenceFastaDict
+ Boolean gvcf = false
+ String emitRefConfidence = if gvcf then "GVCF" else "NONE"
+ Boolean dontUseSoftClippedBases = false
+
+ Array[File]+? intervalList
+ Array[File]+? excludeIntervalList
Float? contamination
File? dbsnpVCF
File? dbsnpVCFIndex
File? pedigree
Int? ploidy
String? outputMode
- Boolean gvcf = false
- String emitRefConfidence = if gvcf then "GVCF" else "NONE"
- Boolean dontUseSoftClippedBases = false
Float? standardMinConfidenceThresholdForCalling
- Int memoryMb = javaXmxMb + 512
- # Memory increases with time used. 4G should cover most use cases.
Int javaXmxMb = 4096
+ # Memory increases with time used. 4G should cover most use cases.
+ Int memoryMb = javaXmxMb + 512
Int timeMinutes = 400 # This will likely be used with intervals, as such size based estimation can't be used.
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -995,50 +1032,48 @@ task HaplotypeCaller {
}
runtime {
- docker: dockerImage
+ memory: "~{memoryMb}MiB"
time_minutes: timeMinutes
- memory: "~{memoryMb}M"
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"}
inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"}
- intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"}
- excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"}
outputPath: {description: "The location to write the output to.", category: "required"}
- ploidy: {description: "The ploidy with which the variants should be called.", category: "common"}
- gvcf: {description: "Whether the output should be a gvcf", category: "common"}
- referenceFasta: {description: "The reference fasta file which was also used for mapping.",
- category: "required"}
- referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
- category: "required"}
+ referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaIndex: {description: "The index for the reference fasta file.", category: "required"}
- contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"}
- outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.",
- category: "advanced"}
- emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'",
- category: "advanced"}
+ gvcf: {description: "Whether the output should be a gvcf.", category: "common"}
+ emitRefConfidence: {description: "Whether to include reference calls. Three modes: 'NONE', 'BP_RESOLUTION' and 'GVCF'.", category: "advanced"}
dontUseSoftClippedBases: {description: "Do not use soft-clipped bases. Should be 'true' for RNA variant calling.", category: "common"}
- standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"}
+ intervalList: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"}
+ excludeIntervalList: {description: "Bed files or interval lists describing the regions to NOT operate on.", category: "common"}
+ contamination: {description: "Equivalent to HaplotypeCaller's `-contamination` option.", category: "advanced"}
dbsnpVCF: {description: "A dbSNP VCF.", category: "common"}
dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"}
- pedigree: {description: "Pedigree file for determining the population \"founders\"", category: "common"}
+ pedigree: {description: "Pedigree file for determining the population \"founders\".", category: "common"}
+ ploidy: {description: "The ploidy with which the variants should be called.", category: "common"}
+ outputMode: {description: "Specifies which type of calls we should output. Same as HaplotypeCaller's `--output-mode` option.", category: "advanced"}
+ standardMinConfidenceThresholdForCalling: {description: "Confidence threshold used for calling variants.", category: "advanced"}
+ javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"}
memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
- javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVCF: {description: "Raw, unfiltered SNP and indel calls."}
+ outputVCFIndex: {description: "Index of output VCF."}
}
}
-
task LearnReadOrientationModel {
input {
Array[File]+ f1r2TarGz
- String memory = "13G"
String javaXmx = "12G"
+ String memory = "13GiB"
Int timeMinutes = 120
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -1056,19 +1091,21 @@ task LearnReadOrientationModel {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
f1r2TarGz: {description: "A f1r2TarGz file outputed by mutect2.", category: "required"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ artifactPriorsTable: {description: "Maximum likelihood estimates of artifact prior probabilities in the orientation bias mixture model filter."}
}
}
@@ -1076,8 +1113,8 @@ task MergeStats {
input {
Array[File]+ stats
- String memory = "15G"
String javaXmx = "14G"
+ String memory = "15GiB"
Int timeMinutes = 30
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -1095,19 +1132,21 @@ task MergeStats {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
stats: {description: "Statistics files to be merged.", category: "required"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ mergedStats: {description: "Merged stats from scattered Mutect2 runs."}
}
}
@@ -1117,14 +1156,13 @@ task ModelSegments {
String outputPrefix
File denoisedCopyRatios
File allelicCounts
- File? normalAllelicCounts
- Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts)
- then 0
- else 30
+ Int minimumTotalAlleleCountCase = if defined(normalAllelicCounts) then 0 else 30
Int maximumNumberOfSmoothingIterations = 10
- String memory = "11G"
+ File? normalAllelicCounts
+
String javaXmx = "10G"
+ String memory = "11GiB"
Int timeMinutes = 60
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -1145,7 +1183,6 @@ task ModelSegments {
output {
File hetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.tsv"
- File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv"
File copyRatioSegments = outputDir + "/" + outputPrefix + ".cr.seg"
File copyRatioCBS = outputDir + "/" + outputPrefix + ".cr.igv.seg"
File alleleFractionCBS = outputDir + "/" + outputPrefix + ".af.igv.seg"
@@ -1155,29 +1192,41 @@ task ModelSegments {
File modeledSegments = outputDir + "/" + outputPrefix + ".modelFinal.seg"
File copyRatioParameters = outputDir + "/" + outputPrefix + ".modelFinal.cr.param"
File alleleFractionParameters = outputDir + "/" + outputPrefix + ".modelFinal.af.param"
+ File? normalHetrozygousAllelicCounts = outputDir + "/" + outputPrefix + ".hets.normal.tsv"
}
runtime {
- docker: dockerImage
- time_minute: timeMinutes
memory: memory
+ time_minute: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
outputDir: {description: "The directory to write the ouput to.", category: "common"}
outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"}
denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"}
allelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts.", category: "required" }
- normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"}
minimumTotalAlleleCountCase: {description: "Equivalent to gatk ModelSeqments' `--minimum-total-allele-count-case` option.", category: "advanced"}
maximumNumberOfSmoothingIterations: {description: "Equivalent to gatk ModelSeqments' `--maximum-number-of-smoothing-iterations` option.", category: "advanced"}
-
+ normalAllelicCounts: {description: "The allelicCounts as generate by CollectAllelicCounts for a matched normal.", category: "common"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ hetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the case sample."}
+ copyRatioSegments: {description: "It contains the segments from the .modelFinal.seg file converted to a format suitable for input to CallCopyRatioSegments."}
+ copyRatioCBS: {description: "The posterior medians of the log2 copy ratio."}
+ alleleFractionCBS: {description: "Minor-allele fraction."}
+ unsmoothedModeledSegments: {description: "The initial modeled-segments result before segmentation smoothing."}
+ unsmoothedCopyRatioParameters: {description: "The initial copy-ratio-model global-parameter result before segmentation smoothing."}
+ unsmoothedAlleleFractionParameters: {description: "The initial allele-fraction-model global-parameter result before segmentation smoothing."}
+ modeledSegments: {description: "The final modeled-segments result after segmentation smoothing."}
+ copyRatioParameters: {description: "The final copy-ratio-model global-parameter result after segmentation smoothing."}
+ alleleFractionParameters: {description: "The final allele-fraction-model global-parameter result after segmentation smoothing."}
+ normalHetrozygousAllelicCounts: {description: "Allelic-counts file containing the counts at sites genotyped as heterozygous in the matched-normal sample."}
}
}
@@ -1190,17 +1239,18 @@ task MuTect2 {
File referenceFastaFai
String outputVcf
String tumorSample
+ String f1r2TarGz = "f1r2.tar.gz"
+ Array[File]+ intervals
+ String outputStats = outputVcf + ".stats"
+
String? normalSample
File? germlineResource
File? germlineResourceIndex
File? panelOfNormals
File? panelOfNormalsIndex
- String f1r2TarGz = "f1r2.tar.gz"
- Array[File]+ intervals
- String outputStats = outputVcf + ".stats"
- String memory = "5G"
String javaXmx = "4G"
+ String memory = "5GiB"
Int timeMinutes = 240
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -1229,12 +1279,13 @@ task MuTect2 {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputBams: {description: "The BAM files on which to perform variant calling.", category: "required"}
inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"}
referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
@@ -1242,20 +1293,24 @@ task MuTect2 {
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
outputVcf: {description: "The location to write the output VCF file to.", category: "required"}
tumorSample: {description: "The name of the tumor/case sample.", category: "required"}
+ f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"}
+ intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"}
+ outputStats: {description: "The location the output statistics should be written to.", category: "advanced"}
normalSample: {description: "The name of the normal/control sample.", category: "common"}
germlineResource: {description: "Equivalent to Mutect2's `--germline-resource` option.", category: "advanced"}
germlineResourceIndex: {description: "The index for the germline resource.", category: "advanced"}
panelOfNormals: {description: "Equivalent to Mutect2's `--panel-of-normals` option.", category: "advanced"}
panelOfNormalsIndex: {description: "The index for the panel of normals.", category: "advanced"}
- f1r2TarGz: {description: "Equivalent to Mutect2's `--f1r2-tar-gz` option.", category: "advanced"}
- intervals: {description: "Bed files describing the regiosn to operate on.", category: "required"}
- outputStats: {description: "The location the output statistics should be written to.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ vcfFile: {description: "Somatic SNVs and indels called via local assembly of haplotypes."}
+ vcfFileIndex: {description: "Index for Mutect2 VCF."}
+ f1r2File: {description: "Contains information that can then be passed to LearnReadOrientationModel, which generate an artifact prior table for each tumor sample for FilterMutectCalls to use."}
+ stats: {description: "Stats file."}
}
}
@@ -1266,10 +1321,11 @@ task PlotDenoisedCopyRatios {
String outputPrefix
File standardizedCopyRatios
File denoisedCopyRatios
+
Int? minimumContigLength
- String memory = "4G"
String javaXmx = "3G"
+ String memory = "4GiB"
Int timeMinutes = 2
String dockerImage = "broadinstitute/gatk:4.1.8.0"
}
@@ -1289,32 +1345,39 @@ task PlotDenoisedCopyRatios {
output {
File denoisedCopyRatiosPlot = outputDir + "/" + outputPrefix + ".denoised.png"
- File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png"
File standardizedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".standardizedMAD.txt"
File denoisedMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".denoisedMAD.txt"
File deltaMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".deltaMAD.txt"
File deltaScaledMedianAbsoluteDeviation = outputDir + "/" + outputPrefix + ".scaledDeltaMAD.txt"
+ File? denoisedCopyRatiosLimitedPlot = outputDir + "/" + outputPrefix + ".denoisedLimit4.png"
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"}
outputDir: {description: "The directory to write the ouput to.", category: "common"}
outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"}
- denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"}
standardizedCopyRatios: {description: "The standardized copy ratios as generated by DenoiseReadCounts.", category: "required"}
+ denoisedCopyRatios: {description: "The denoised copy ratios as generated by DenoiseReadCounts.", category: "required"}
minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ denoisedCopyRatiosPlot: {description: "Plot showing the entire range of standardized and denoised copy ratios."}
+ standardizedMedianAbsoluteDeviation: {description: "Standardized median absolute deviation copy ratios."}
+ denoisedMedianAbsoluteDeviation: {description: "Denoised median absolute deviation copy ratios."}
+ deltaMedianAbsoluteDeviation: {description: "The change between `standardizedMedianAbsoluteDeviation` & `denoisedMedianAbsoluteDeviation`."}
+ deltaScaledMedianAbsoluteDeviation: {description: "The change between `standardizedMedianAbsoluteDeviation` & `denoisedMedianAbsoluteDeviation` scaled by standardized MAD."}
+ denoisedCopyRatiosLimitedPlot: {description: "Plot showing the standardized and denoised copy ratios limited to ratios within [0, 4]."}
}
}
@@ -1326,10 +1389,11 @@ task PlotModeledSegments {
File denoisedCopyRatios
File segments
File allelicCounts
+
Int? minimumContigLength
- String memory = "4G"
String javaXmx = "3G"
+ String memory = "4GiB"
Int timeMinutes = 2
String dockerImage = "broadinstitute/gatk:4.1.8.0"
}
@@ -1353,12 +1417,13 @@ task PlotModeledSegments {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file used for the analyses.", category: "required"}
outputDir: {description: "The directory to write the ouput to.", category: "common"}
outputPrefix: {description: "The prefix of the output files. Should not include directories.", category: "required"}
@@ -1366,12 +1431,13 @@ task PlotModeledSegments {
segments: {description: "The modeled segments as generated by ModelSegments.", category: "required"}
allelicCounts: {description: "The hetrozygous allelic counts as generated by ModelSegments.", category: "required"}
minimumContigLength: {description: "The minimum length for a contig to be included in the plots.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ modeledSegmentsPlot: {description: "This plot shows the input denoised copy ratios and/or alternate-allele fractions as points, as well as box plots for the available posteriors in each segment."}
}
}
@@ -1380,14 +1446,15 @@ task PreprocessIntervals {
File referenceFasta
File referenceFastaDict
File referenceFastaFai
- File? intervals
String outputIntervalList = "bins.interval_list"
Int binLength = if defined(intervals) then 0 else 1000
Int padding = if defined(intervals) then 250 else 0
String intervalMergingRule = "OVERLAPPING_ONLY"
- String memory = "4G"
+ File? intervals
+
String javaXmx = "3G"
+ String memory = "4GiB"
Int timeMinutes = 1 + ceil(size(referenceFasta, "G") * 6)
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -1411,41 +1478,47 @@ task PreprocessIntervals {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
- referenceFasta: {description: "The reference fasta file..", category: "required"}
+ # inputs
+ referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
- intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"}
outputIntervalList: {description: "The location the output should be written to.", category: "advanced"}
binLength: {description: "The size of the bins to be created. Should be 0 for targeted/exome sequencing.", category: "advanced"}
padding: {description: "The padding to be added to the bins. Should be 0 if contiguos binning is used, eg with WGS.", category: "advanced"}
intervalMergingRule: {description: "Equivalent to gatk PreprocessIntervals' `--interval-merging-rule` option.", category: "advanced"}
+ intervals: {description: "Bed files describing the regiosn to operate on.", category: "common"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ intervalList: {description: "Preprocessed Picard interval-list file."}
}
}
task SelectVariants {
input {
+ File inputVcf
+ File inputVcfIndex
File referenceFasta
File referenceFastaDict
File referenceFastaFai
- File inputVcf
- File inputVcfIndex
String outputPath = "output.vcf.gz"
- String? selectTypeToInclude
Array[File] intervals = []
- String memory = "5G"
+
+ Boolean excludeFiltered = false
+ String? selectTypeToInclude
+ String? selectGenotype
+
String javaXmx = "4G"
+ String memory = "5GiB"
Int timeMinutes = 60
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -1458,6 +1531,8 @@ task SelectVariants {
-R ~{referenceFasta} \
-V ~{inputVcf} \
~{"--select-type-to-include " + selectTypeToInclude} \
+ ~{"-select-genotype \"" + selectGenotype}~{true="\"" false="" defined(selectGenotype)} \
+ ~{true="--exclude-filtered" false="" excludeFiltered} \
~{true="-L" false="" length(intervals) > 0} ~{sep=' -L ' intervals} \
-O ~{outputPath}
}
@@ -1468,29 +1543,31 @@ task SelectVariants {
}
runtime {
- docker: dockerImage
- time_minute: timeMinutes
memory: memory
+ time_minute: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputVcf: {description: "The VCF input file.", category: "required"}
inputVcfIndex: {description: "The input VCF file's index.", category: "required"}
- referenceFasta: {description: "The reference fasta file which was also used for mapping.",
- category: "required"}
- referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
- category: "required"}
+ referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
- selectTypeToInclude: {description: "Select only a certain type of variants from the input file", category: "common"}
outputPath: {description: "The location the output VCF file should be written.", category: "advanced"}
intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "common"}
-
+ selectTypeToInclude: {description: "Select only a certain type of variants from the input file.", category: "common"}
+ excludeFiltered: {description: "Remove all variants that do not have a PASS filter", category: "advanced"}
+ selectGenotype: {description: "The genotype to be selected", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVcf: {description: "A new VCF file containing the selected subset of variants."}
+ outputVcfIndex: {description: "Index of the new output VCF file."}
}
}
@@ -1504,8 +1581,8 @@ task SplitNCigarReads {
String outputBam
Array[File] intervals = []
- String memory = "5G"
String javaXmx = "4G"
+ String memory = "5GiB"
Int timeMinutes = 120 # This will likely be used with intervals, as such size based estimation can't be used.
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -1527,28 +1604,28 @@ task SplitNCigarReads {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputBam: {description: "The BAM file for which spliced reads should be split.", category: "required"}
inputBamIndex: {description: "The input BAM file's index.", category: "required"}
- referenceFasta: {description: "The reference fasta file which was also used for mapping.",
- category: "required"}
- referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
- category: "required"}
+ referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
outputBam: {description: "The location the output BAM file should be written.", category: "required"}
intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"}
-
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ bam: {description: "BAM file with reads split at N CIGAR elements and CIGAR strings updated."}
+ bamIndex: {description: "Index of output BAM file."}
}
}
@@ -1558,11 +1635,6 @@ task VariantEval {
Array[File] evalVcfsIndex
Array[File] comparisonVcfs = []
Array[File] comparisonVcfsIndex = []
- File? referenceFasta
- File? referenceFastaDict
- File? referenceFastaFai
- File? dbsnpVCF
- File? dbsnpVCFIndex
Array[File] intervals = []
String outputPath = "eval.table"
Boolean doNotUseAllStandardModules = false
@@ -1572,8 +1644,14 @@ task VariantEval {
Array[String] samples = []
Boolean mergeEvals = false
- String memory = "5G"
+ File? referenceFasta
+ File? referenceFastaDict
+ File? referenceFastaFai
+ File? dbsnpVCF
+ File? dbsnpVCFIndex
+
String javaXmx = "4G"
+ String memory = "5GiB"
# TODO: Refine estimate. For now 4 minutes per GB of input.
Int timeMinutes = ceil(size(flatten([evalVcfs, comparisonVcfs, select_all([referenceFasta, dbsnpVCF])]), "G") * 20)
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
@@ -1604,35 +1682,40 @@ task VariantEval {
runtime {
cpu: 1
- docker: dockerImage
memory: memory
time_minutes: timeMinutes
+ docker: dockerImage
}
+
parameter_meta {
+ # inputs
evalVcfs: {description: "Variant sets to evaluate.", category: "required"}
evalVcfsIndex: {description: "Indexes for the variant sets.", category: "required"}
comparisonVcfs: {description: "Compare set vcfs.", category: "advanced"}
comparisonVcfsIndex: {description: "Indexes for the compare sets.", category: "advanced"}
- evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true)", category: "common"}
- stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true)", category: "common"}
- samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"} # Advanced because this description is impossible to understand...
- mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track", category: "common"}
+ intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"}
+ outputPath: {description: "The location the output table should be written.", category: "advanced"}
doNotUseAllStandardModules: {description: "Do not use the standard modules by default (instead, only those that are specified with the evalModules option).", category: "common"}
doNotUseAllStandardStratifications: {description: "Do not use the standard stratification modules by default (instead, only those that are specified with the stratificationModules option).", category: "common"}
+ evalModules: {description: "One or more specific eval modules to apply to the eval track(s) (in addition to the standard modules, unless doNotUseAllStandardModules=true).", category: "common"}
+ stratificationModules: {description: "One or more specific stratification modules to apply to the eval track(s) (in addition to the standard stratifications, unless doNotUseAllStandardStratifications=true).", category: "common"}
+ samples: {description: "Derive eval and comp contexts using only these sample genotypes, when genotypes are available in the original context." , category: "advanced"}
+ mergeEvals: {description: "If provided, all evalVcf tracks will be merged into a single eval track.", category: "common"}
referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "common"}
referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "common"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "common"}
dbsnpVCF: {description: "A dbSNP VCF.", category: "common"}
dbsnpVCFIndex: {description: "The index for the dbSNP VCF.", category: "common"}
- outputPath: {description: "The location the output table should be written.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ table: {description: "Evaluation tables detailing the results of the eval modules which were applied."}
}
}
+
task VariantFiltration {
input {
File inputVcf
@@ -1644,8 +1727,8 @@ task VariantFiltration {
Array[String]+ filterArguments
Array[File] intervals = []
- String memory = "5G"
String javaXmx = "4G"
+ String memory = "5GiB"
Int timeMinutes = 120
String dockerImage = "quay.io/biocontainers/gatk4:4.1.8.0--py38h37ae868_0"
}
@@ -1668,29 +1751,28 @@ task VariantFiltration {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputVcf: {description: "The VCF to be filtered.", category: "required"}
inputVcfIndex: {description: "The input VCF file's index.", category: "required"}
- referenceFasta: {description: "The reference fasta file which was also used for mapping.",
- category: "required"}
- referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
- category: "required"}
+ referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
outputPath: {description: "The location the output VCF file should be written.", category: "common"}
+ filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2'].", category: "required"}
intervals: {description: "Bed files or interval lists describing the regions to operate on.", category: "advanced"}
- filterArguments: {description: "Arguments that should be used for the filter. For example: ['--filter-name', 'my_filter', '--filter-expression', 'AB<0.2']",
- category: "required"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ filteredVcf: {description: "A filtered VCF in which passing variants are annotated as PASS and failing variants are annotated with the name(s) of the filter(s) they failed."}
+ filteredVcfIndex: {description: "Index of filtered VCF."}
}
}
-
diff --git a/gffcompare.wdl b/gffcompare.wdl
index e5f62b5e..fe1db0a8 100644
--- a/gffcompare.wdl
+++ b/gffcompare.wdl
@@ -22,16 +22,10 @@ version 1.0
task GffCompare {
input {
- File? inputGtfList
Array[File] inputGtfFiles
- File referenceAnnotation
- String? outputDir
- String outPrefix = "gffcmp" # gffcmp is the default used by the program as well. This
- # needs to be defined in order for the output values to be consistent and correct.
- File? genomeSequences
- Int? maxDistanceFreeEndsTerminalExons
- Int? maxDistanceGroupingTranscriptStartSites
- String? namePrefix
+ # gffcmp is the default used by the program as well. This needs to be
+ # defined in order for the output values to be consistent and correct.
+ String outPrefix = "gffcmp"
Boolean C = false
Boolean A = false
Boolean X = false
@@ -44,15 +38,24 @@ task GffCompare {
Boolean verbose = false
Boolean debugMode = false
- Int timeMinutes = 1 + ceil(size(inputGtfFiles, "G") * 30)
+ File? inputGtfList
+ File? referenceAnnotation
+ String? outputDir
+ File? genomeSequences
+ Int? maxDistanceFreeEndsTerminalExons
+ Int? maxDistanceGroupingTranscriptStartSites
+ String? namePrefix
+
+ String memory = "4GiB"
+ Int timeMinutes = 1 + ceil(size(inputGtfFiles, "GiB") * 30)
String dockerImage = "quay.io/biocontainers/gffcompare:0.10.6--h2d50403_0"
# This workaround only works in the input section.
- # Issue addressed at https://github.com/openwdl/wdl/pull/263
+ # Issue addressed at https://github.com/openwdl/wdl/pull/263.
File? noneFile # This is a wdl workaround. Please do not assign!
}
- # This allows for the creation of output directories
+ # This allows for the creation of output directories.
String dirPrefix = if defined(outputDir)
then select_first([outputDir]) + "/"
else ""
@@ -62,7 +65,7 @@ task GffCompare {
set -e
~{"mkdir -p " + outputDir}
gffcompare \
- -r ~{referenceAnnotation} \
+ ~{"-r " + referenceAnnotation} \
~{"-o '" + totalPrefix + "'"} \
~{"-s " + genomeSequences} \
~{"-e " + maxDistanceFreeEndsTerminalExons} \
@@ -89,43 +92,39 @@ task GffCompare {
else 0
Int noInputFiles = length(inputGtfFiles)
Boolean oneFile = (noFilesGtfList + noInputFiles) == 1
- String annotatedName = if oneFile
+ String annotatedName = if oneFile && defined(referenceAnnotation)
then "annotated"
else "combined"
- # Check if a redundant .gtf will be created
+ # Check if a redundant .gtf will be created.
Boolean createRedundant = C || A || X
output {
+ # noneFile is not stable. Please replace this as soon as wdl spec allows.
File annotated = totalPrefix + "." + annotatedName + ".gtf"
File loci = totalPrefix + ".loci"
File stats = totalPrefix + ".stats"
File tracking = totalPrefix + ".tracking"
- # noneFile is not stable. Please replace this as soon as wdl spec allows
+ Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons])
File? redundant = if createRedundant
then totalPrefix + ".redundant.gtf"
else noneFile
File? missedIntrons = if debugMode
then totalPrefix + ".missed_introns.gtf"
else noneFile
- Array[File] allFiles = select_all([annotated, loci, stats, tracking, redundant, missedIntrons])
}
runtime {
- time_minutes: timeMinutes
- docker: dockerImage
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
- inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"}
+ # inputs
inputGtfFiles: {description: "The input GTF files.", category: "required"}
referenceAnnotation: {description: "The GTF file to compare with.", category: "required"}
- outputDir: {description: "The location the output should be written.", category: "common"}
outPrefix: {description: "The prefix for the output.", category: "advanced"}
- genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"}
- maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"}
- maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"}
- namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"}
C: {description: "Equivalent to gffcompare's `-C` flag.", category: "advanced"}
A: {description: "Equivalent to gffcompare's `-A` flag.", category: "advanced"}
X: {description: "Equivalent to gffcompare's `-X` flag.", category: "advanced"}
@@ -137,9 +136,24 @@ task GffCompare {
noTmap: {description: "Equivalent to gffcompare's `-T` flag.", category: "advanced"}
verbose: {description: "Equivalent to gffcompare's `-V` flag.", category: "advanced"}
debugMode: {description: "Equivalent to gffcompare's `-D` flag.", category: "advanced"}
+ inputGtfList: {description: "Equivalent to gffcompare's `-i` option.", category: "advanced"}
+ outputDir: {description: "The location the output should be written.", category: "common"}
+ genomeSequences: {description: "Equivalent to gffcompare's `-s` option.", category: "advanced"}
+ maxDistanceFreeEndsTerminalExons: {description: "Equivalent to gffcompare's `-e` option.", category: "advanced"}
+ maxDistanceGroupingTranscriptStartSites: {description: "Equivalent to gffcompare's `-d` option.", category: "advanced"}
+ namePrefix: {description: "Equivalent to gffcompare's `-p` option.", category: "advanced"}
+ memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ annotated: {description: "Annotated GTF file."}
+ loci: {description: "File describing the processed loci."}
+ stats: {description: "Various statistics related to the “accuracy” (or a measure of agreement) of the input transcripts when compared to reference annotation data."}
+ tracking: {description: "File matching up transcripts between samples."}
+ allFiles: {description: "A collection of all output files."}
+ redundant: {description: "File containing duplicate/redundant transcripts."}
+ missedIntrons: {description: "File denoting missed introns."}
}
meta {
@@ -147,4 +161,4 @@ task GffCompare {
exclude: ["noneFile"]
}
}
-}
\ No newline at end of file
+}
diff --git a/gffread.wdl b/gffread.wdl
index d83e4d76..26a2773c 100644
--- a/gffread.wdl
+++ b/gffread.wdl
@@ -24,19 +24,22 @@ task GffRead {
input {
File inputGff
File genomicSequence
+ Boolean outputGtfFormat = false
+
File? genomicIndex # Optional. GFFRead can create this by itself.
String? exonsFastaPath
String? CDSFastaPath
String? proteinFastaPath
String? filteredGffPath
- Boolean outputGtfFormat = false
- Int timeMinutes = 1 + ceil(size(inputGff) * 10)
+
+ String memory = "4GiB"
+ Int timeMinutes = 1 + ceil(size(inputGff, "GiB") * 10)
String dockerImage = "quay.io/biocontainers/gffread:0.9.12--0"
}
# The mkdirs below are hackish. It should be
- # ~{"mkir -p $(dirname " + somePath + ")"}
- # but this goes wrong. Cromwell will always use ')' even if somepath is not defined.
+ # ~{"mkir -p $(dirname " + somePath + ")"} but this goes wrong.
+ # Cromwell will always use ')' even if somepath is not defined.
# Which leads to crashing.
command {
set -e
@@ -62,21 +65,29 @@ task GffRead {
}
runtime {
- docker: dockerImage
+ memory: memory
time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputGff: {description: "The input GFF file.", category: "required"}
genomicSequence: {description: "The genome.", category: "required"}
+ outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"}
genomicIndex: {description: "The genome's index.", category: "advanced"}
exonsFastaPath: {description: "The location the exons fasta should be written to.", category: "advanced"}
CDSFastaPath: {description: "The location the CDS fasta should be written to.", category: "advanced"}
proteinFastaPath: {description: "The location the protein fasta should be written to.", category: "advanced"}
filteredGffPath: {description: "The location the filtered GFF should be written to.", category: "advanced"}
- outputGtfFormat: {description: "Equivalent to gffread's `-T` flag.", category: "advanced"}
+ memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ exonsFasta: {description: "Fasta file containing exons."}
+ CDSFasta: {description: "Fasta file containing CDS's."}
+ proteinFasta: {description: "Fasta file containing proteins."}
+ filteredGff: {description: "Filtered GFF file."}
}
-}
\ No newline at end of file
+}
diff --git a/gridss.wdl b/gridss.wdl
new file mode 100644
index 00000000..5aca3825
--- /dev/null
+++ b/gridss.wdl
@@ -0,0 +1,496 @@
+version 1.0
+
+# Copyright (c) 2020 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import "bwa.wdl" as bwa
+
+task AnnotateInsertedSequence {
+ input {
+ File inputVcf
+ String outputPath = "gridss.annotated.vcf.gz"
+ File viralReference
+ File viralReferenceFai
+ File viralReferenceDict
+ File viralReferenceImg
+
+ Int threads = 8
+ String javaXmx = "8G"
+ String memory = "9GiB"
+ String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+ Int timeMinutes = 120
+ }
+
+ command {
+ set -e
+ _JAVA_OPTIONS="$_JAVA_OPTIONS -Xmx~{javaXmx}"
+ AnnotateInsertedSequence \
+ REFERENCE_SEQUENCE=~{viralReference} \
+ INPUT=~{inputVcf} \
+ OUTPUT=~{outputPath} \
+ ALIGNMENT=APPEND \
+ WORKING_DIR='.' \
+ WORKER_THREADS=~{threads}
+ }
+
+ output {
+ File outputVcf = outputPath
+ File outputVcfIndex = outputPath + ".tbi"
+ }
+
+ runtime {
+ cpu: threads
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ inputVcf: {description: "The input VCF file.", category: "required"}
+ outputPath: {description: "The path the output will be written to.", category: "common"}
+ viralReference: {description: "A fasta file with viral sequences.", category: "required"}
+ viralReferenceFai: {description: "The index for the viral reference fasta.", category: "required"}
+ viralReferenceDict: {description: "The dict file for the viral reference.", category: "required"}
+ viralReferenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the viral reference.", category: "required"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task AnnotateSvTypes {
+ input {
+ File gridssVcf
+ File gridssVcfIndex
+ String outputPath = "./gridss.svtyped.vcf.bgz"
+
+ String memory = "32GiB"
+ String dockerImage = "quay.io/biocontainers/bioconductor-structuralvariantannotation:1.10.0--r41hdfd78af_0"
+ Int timeMinutes = 240
+ }
+
+ String effectiveOutputPath = sub(outputPath, "\\.bgz", "")
+ String index = if effectiveOutputPath != outputPath then "T" else "F"
+
+
+ # Based on https://github.com/PapenfussLab/gridss/issues/74
+ command <<<
+ set -e
+ mkdir -p "$(dirname ~{outputPath})"
+ R --vanilla << "EOF"
+ library(VariantAnnotation)
+ library(StructuralVariantAnnotation)
+
+ vcf_path <- "~{gridssVcf}"
+ out_path <- "~{effectiveOutputPath}"
+
+ # Simple SV type classifier
+ simpleEventType <- function(gr) {
+ return(ifelse(seqnames(gr) != seqnames(partner(gr)), "BND", # inter-chromosomosal
+ ifelse(gr$insLen >= abs(gr$svLen) * 0.7, "INS",
+ ifelse(strand(gr) == strand(partner(gr)), "INV",
+ ifelse(xor(start(gr) < start(partner(gr)), strand(gr) == "-"), "DEL",
+ "DUP")))))
+ }
+
+ header <- scanVcfHeader(vcf_path)
+ vcf <- readVcf(vcf_path, seqinfo(header))
+ gr <- breakpointRanges(vcf)
+ svtype <- simpleEventType(gr)
+ info(vcf[gr$sourceId])$SVTYPE <- svtype
+ # GRIDSS doesn't supply a GT, simply set it to 0/1
+ geno(vcf)$GT <- as.matrix(sapply(row.names(vcf), function(x) {"0/1"}))
+ # Select only one breakend per event (also removes single breakends):
+ # sourceId ends with o or h for paired breakends, the first in the pair
+ # end with o the second with h. Single breakend end with b, these will
+ # also be removed since we can't determine the SVTYPE.
+ gr2 <- gr[grepl(".*o$", gr$sourceId)]
+ writeVcf(vcf[gr2$sourceId], out_path, index=~{index})
+ EOF
+ >>>
+
+ output {
+ File vcf = outputPath
+ File? vcfIndex = outputPath + ".tbi"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ gridssVcf: {description: "The VCF produced by GRIDSS.", category: "required"}
+ gridssVcfIndex: {description: "The index for the VCF produced by GRIDSS.", category: "required"}
+ outputPath: {description: "The path the output should be written to.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task FilterPon {
+ input {
+ File ponBed
+ File ponBedpe
+ Int minimumScore = 3
+ String outputDir = "."
+
+ String memory = "1GiB"
+ String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+ Int timeMinutes = 20
+ }
+
+ command <<<
+ set -e
+ mkdir -p ~{outputDir}
+
+ cat ~{ponBed} | awk '{if ($5 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_single_breakend.bed
+ cat ~{ponBedpe} | awk '{if ($8 >= ~{minimumScore}) print $0}' > ~{outputDir}/gridss_pon_breakpoint.bedpe
+ >>>
+
+ output {
+ File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe"
+ File bed = "~{outputDir}/gridss_pon_single_breakend.bed"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ ponBed: {description: "The PON BED file.", category: "required"}
+ ponBedpe: {description: "The PON BEDPE file.", category: "required"}
+ minimumScore: {description: "The minimum number normal samples an SV must have been found in to be kept.", category: "advanced"}
+ outputDir: {description: "The directory the output will be written to.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task GeneratePonBedpe {
+ input {
+ Array[File]+ vcfFiles
+ Array[File]+ vcfIndexes
+ File referenceFasta
+ File referenceFastaFai
+ String outputDir = "."
+
+ Int threads = 8
+ String javaXmx = "8G"
+ String memory = "9GiB"
+ String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+ Int timeMinutes = 120
+ }
+
+ command {
+ set -e
+ mkdir -p ~{outputDir}
+ java -Xmx~{javaXmx} \
+ -cp /usr/local/share/gridss-2.12.2-0/gridss.jar \
+ gridss.GeneratePonBedpe \
+ INPUT=~{sep=" INPUT=" vcfFiles} \
+ NO=0 \
+ O=~{outputDir}/gridss_pon_breakpoint.bedpe \
+ SBO=~{outputDir}/gridss_pon_single_breakend.bed \
+ REFERENCE_SEQUENCE=~{referenceFasta} \
+ THREADS=~{threads}
+ }
+
+ output {
+ File bedpe = "~{outputDir}/gridss_pon_breakpoint.bedpe"
+ File bed = "~{outputDir}/gridss_pon_single_breakend.bed"
+ }
+
+ runtime {
+ cpu: threads
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ vcfFiles: {description: "The vcf files with the normals as the first sample.", category: "required"}
+ referenceFasta: {description: "The fasta of the reference genome.", category: "required"}
+ referenceFastaFai: {description: "The index for the reference genome fasta.", category: "required"}
+ outputDir: {description: "The directory the output will be written to.", category: "common"}
+ threads: {description: "The number of the threads to use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task GRIDSS {
+ input {
+ Array[File]+ tumorBam
+ Array[File]+ tumorBai
+ Array[String]+ tumorLabel
+ BwaIndex reference
+ String outputPrefix = "gridss"
+
+ File? normalBam
+ File? normalBai
+ String? normalLabel
+ File? blacklistBed
+ File? gridssProperties
+
+ Int jvmHeapSizeGb = 64
+ Int nonJvmMemoryGb = 10
+ Int threads = 12
+ Int timeMinutes = ceil(7200 / threads) + 1800
+ String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+ }
+
+ command {
+ set -e
+ mkdir -p "$(dirname ~{outputPrefix})"
+ gridss \
+ -w . \
+ --reference ~{reference.fastaFile} \
+ --output ~{outputPrefix}.vcf.gz \
+ --assembly ~{outputPrefix}_assembly.bam \
+ ~{"-c " + gridssProperties} \
+ ~{"-t " + threads} \
+ ~{"--jvmheap " + jvmHeapSizeGb + "G"} \
+ --labels ~{normalLabel}~{true="," false="" defined(normalLabel)}~{sep="," tumorLabel} \
+ ~{"--blacklist " + blacklistBed} \
+ ~{normalBam} \
+ ~{sep=" " tumorBam}
+ samtools index ~{outputPrefix}_assembly.bam ~{outputPrefix}_assembly.bai
+
+ # For some reason the VCF index is sometimes missing
+ if [ ! -e ~{outputPrefix}.vcf.gz.tbi ]
+ then
+ tabix ~{outputPrefix}.vcf.gz
+ fi
+ }
+
+ output {
+ File vcf = outputPrefix + ".vcf.gz"
+ File vcfIndex = outputPrefix + ".vcf.gz.tbi"
+ File assembly = outputPrefix + "_assembly.bam"
+ File assemblyIndex = outputPrefix + "_assembly.bai"
+ }
+
+ runtime {
+ cpu: threads
+ memory: "~{jvmHeapSizeGb + nonJvmMemoryGb}GiB"
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ tumorBam: {description: "The input BAM file. This should be the tumor/case sample in case of a paired analysis.", category: "required"}
+ tumorBai: {description: "The index for tumorBam.", category: "required"}
+ tumorLabel: {description: "The name of the (tumor) sample.", category: "required"}
+ reference: {description: "A BWA index, this should also include the fasta index file (.fai).", category: "required"}
+ outputPrefix: {description: "The prefix for the output files. This may include parent directories.", category: "common"}
+ normalBam: {description: "The BAM file for the normal/control sample.", category: "advanced"}
+ normalBai: {description: "The index for normalBam.", category: "advanced"}
+ normalLabel: {description: "The name of the normal sample.", category: "advanced"}
+ blacklistBed: {description: "A bed file with blaclisted regins.", category: "advanced"}
+ gridssProperties: {description: "A properties file for gridss.", category: "advanced"}
+
+ threads: {description: "The number of the threads to use.", category: "advanced"}
+ jvmHeapSizeGb: {description: "The size of JVM heap for assembly and variant calling", category: "advanced"}
+ nonJvmMemoryGb: {description: "The amount of memory in Gb to be requested besides JVM memory.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ vcf: {description: "VCF file including variant allele fractions."}
+ vcfIndex: {description: "Index of output VCF."}
+ assembly: {description: "The GRIDSS assembly BAM."}
+ assemblyIndex: {description: "Index of output BAM file."}
+ }
+}
+
+task GridssAnnotateVcfRepeatmasker {
+ input {
+ File gridssVcf
+ File gridssVcfIndex
+ String outputPath = "./gridss.repeatmasker_annotated.vcf.gz"
+
+ String memory = "25GiB"
+ Int threads = 8
+ String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+ Int timeMinutes = 1440
+ }
+
+ command {
+ gridss_annotate_vcf_repeatmasker \
+ --output ~{outputPath} \
+ --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \
+ -w . \
+ -t ~{threads} \
+ ~{gridssVcf}
+ }
+
+ output {
+ File annotatedVcf = outputPath
+ File annotatedVcfIndex = "~{outputPath}.tbi"
+ }
+
+ runtime {
+ cpu: threads
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ gridssVcf: {description: "The GRIDSS output.", category: "required"}
+ gridssVcfIndex: {description: "The index for the GRIDSS output.", category: "required"}
+ outputPath: {description: "The path the output should be written to.", category: "common"}
+ threads: {description: "The number of the threads to use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task SomaticFilter {
+ input {
+ File vcfFile
+ File vcfIndex
+ File ponBed
+ File ponBedpe
+ String outputPath = "./high_confidence_somatic.vcf"
+ String fullOutputPath = "./high_and_low_confidence_somatic.vcf"
+
+ String memory = "16GiB"
+ String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+ Int timeMinutes = 60
+ }
+
+ String ponDir = sub(ponBed, basename(ponBed), "")
+
+ command {
+ set -e
+ mkdir -p $(dirname ~{outputPath})
+ mkdir -p $(dirname ~{fullOutputPath})
+
+ gridss_somatic_filter \
+ --pondir ~{ponDir} \
+ --input ~{vcfFile} \
+ --output ~{outputPath} \
+ --fulloutput ~{fullOutputPath}
+ }
+
+ output {
+ File fullVcf = "~{fullOutputPath}.bgz"
+ File fullVcfIndex = "~{fullOutputPath}.bgz.tbi"
+ File highConfidenceVcf = "~{outputPath}.bgz"
+ File highConfidenceVcfIndex = "~{outputPath}.bgz.tbi"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ vcfFile: {description: "The GRIDSS VCF file.", category: "required"}
+ vcfIndex: {description: "The index for the GRIDSS VCF file.", category: "required"}
+ ponBed: {description: "The PON BED file.", category: "required"}
+ ponBedpe: {description: "The PON BEDPE file.", category: "required"}
+ outputPath: {description: "The path the high confidence output should be written to.", category: "common"}
+ fullOutputPath: {description: "The path the full output should be written to.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Virusbreakend {
+ input {
+ File bam
+ File bamIndex
+ File referenceFasta
+ File referenceFastaFai
+ File referenceFastaDict
+ File referenceImg
+ File virusbreakendDB
+ String outputPath = "./virusbreakend.vcf"
+
+ String memory = "75GiB"
+ Int threads = 12
+ String dockerImage = "quay.io/biowdl/gridss:2.12.2"
+ Int timeMinutes = 320
+ }
+
+ command {
+ set -e
+ mkdir virusbreakenddb
+ tar -xzvf ~{virusbreakendDB} -C virusbreakenddb --strip-components 1
+ virusbreakend \
+ --output ~{outputPath} \
+ --workingdir . \
+ --reference ~{referenceFasta} \
+ --db virusbreakenddb \
+ --jar /usr/local/share/gridss-2.12.2-0/gridss.jar \
+ -t ~{threads} \
+ ~{bam}
+ }
+
+ output {
+ File vcf = outputPath
+ File summary = "~{outputPath}.summary.tsv"
+ }
+
+ runtime {
+ cpu: threads
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ bam: {description: "A BAM file.", category: "required"}
+ bamIndex: {description: "The index for the BAM file.", category: "required"}
+ referenceFasta: {description: "The fasta of the reference genome.", category: "required"}
+ referenceImg: {description: "The BWA index image (generated with GATK BwaMemIndexImageCreator) of the reference.", category: "required"}
+ virusbreakendDB: {description: "A .tar.gz containing the virusbreakend database.", category: "required"}
+ outputPath: {description: "The path the output should be written to.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ threads: {description: "The number of the threads to use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
diff --git a/hisat2.wdl b/hisat2.wdl
index f9a4bc59..50fabc9d 100644
--- a/hisat2.wdl
+++ b/hisat2.wdl
@@ -22,9 +22,9 @@ version 1.0
task Hisat2 {
input {
- Array[File]+ indexFiles
File inputR1
File? inputR2
+ Array[File]+ indexFiles
String outputBam
String sample
String library
@@ -32,22 +32,22 @@ task Hisat2 {
String platform = "illumina"
Boolean downstreamTranscriptomeAssembly = true
String summaryFilePath = basename(outputBam, ".bam") + ".summary.txt"
-
- Int threads = 4
- Int? sortThreads
Int sortMemoryPerThreadGb = 2
Int compressionLevel = 1
+
+ Int? sortThreads
+
+ Int threads = 4
Int? memoryGb
Int timeMinutes = 1 + ceil(size([inputR1, inputR2], "G") * 180 / threads)
# quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1
- # is a combination of hisat2 and samtools
- # hisat2=2.2.0, samtools=1.10
+ # is a combination of hisat2 and samtools hisat2=2.2.0 & samtools=1.10.
String dockerImage = "quay.io/biocontainers/mulled-v2-a97e90b3b802d1da3d6958e0867610c718cb5eb1:2880dd9d8ad0a7b221d4eacda9a818e92983128d-0"
}
- # Samtools sort may block the pipe while it is writing data to disk.
+ # Samtools sort may block the pipe while it is writing data to disk.
# This can lead to cpu underutilization.
- # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads.
+ # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads.
Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0)
Int totalSortThreads = select_first([sortThreads, estimatedSortThreads])
Int estimatedMemoryGb = 1 + ceil(size(indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads
@@ -81,16 +81,17 @@ task Hisat2 {
}
runtime {
- memory: "~{select_first([memoryGb, estimatedMemoryGb])}G"
cpu: threads
+ memory: "~{select_first([memoryGb, estimatedMemoryGb])}GiB"
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
- indexFiles: {description: "The hisat2 index files.", category: "required"}
+ # inputs
inputR1: {description: "The first-/single-end FastQ file.", category: "required"}
inputR2: {description: "The second-end FastQ file.", category: "common"}
+ indexFiles: {description: "The hisat2 index files.", category: "required"}
outputBam: {description: "The location the output BAM file should be written to.", category: "required"}
sample: {description: "The sample id.", category: "required"}
library: {description: "The library id.", category: "required"}
@@ -98,13 +99,16 @@ task Hisat2 {
platform: {description: "The platform used for sequencing.", category: "advanced"}
downstreamTranscriptomeAssembly: {description: "Equivalent to hisat2's `--dta` flag.", category: "advanced"}
summaryFilePath: {description: "Where the summary file should be written.", category: "advanced"}
+ sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"}
+ compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
+ sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"}
threads: {description: "The number of threads to use.", category: "advanced"}
memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
- sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"}
- sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"}
- compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ bamFile: {description: "Output BAM file."}
+ summaryFile: {description: "Alignment summary file."}
}
-}
\ No newline at end of file
+}
diff --git a/hmftools.wdl b/hmftools.wdl
new file mode 100644
index 00000000..c27630a1
--- /dev/null
+++ b/hmftools.wdl
@@ -0,0 +1,1368 @@
+version 1.0
+
+# Copyright (c) 2020 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Amber {
+ input {
+ String referenceName
+ File referenceBam
+ File referenceBamIndex
+ String tumorName
+ File tumorBam
+ File tumorBamIndex
+ String outputDir = "./amber"
+ File loci
+ File referenceFasta
+ File referenceFastaFai
+ File referenceFastaDict
+
+ Int threads = 2
+ String memory = "85GiB"
+ String javaXmx = "80G"
+ Int timeMinutes = 480
+ String dockerImage = "quay.io/biocontainers/hmftools-amber:3.5--0"
+ }
+
+ command {
+ AMBER -Xmx~{javaXmx} \
+ -reference ~{referenceName} \
+ -reference_bam ~{referenceBam} \
+ -tumor ~{tumorName} \
+ -tumor_bam ~{tumorBam} \
+ -output_dir ~{outputDir} \
+ -threads ~{threads} \
+ -ref_genome ~{referenceFasta} \
+ -loci ~{loci}
+ }
+
+ output {
+ File version = "~{outputDir}/amber.version"
+ File tumorBafPcf = "~{outputDir}/~{tumorName}.amber.baf.pcf"
+ File tumorBafTsv = "~{outputDir}/~{tumorName}.amber.baf.tsv"
+ File tumorBafVcf = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz"
+ File tumorBafVcfIndex = "~{outputDir}/~{tumorName}.amber.baf.vcf.gz.tbi"
+ File tumorContaminationVcf = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz"
+ File tumorContaminationVcfIndex = "~{outputDir}/~{tumorName}.amber.contamination.vcf.gz.tbi"
+ File tumorContaminationTsv = "~{outputDir}/~{tumorName}.amber.contamination.tsv"
+ File tumorQc = "~{outputDir}/~{tumorName}.amber.qc"
+ File normalSnpVcf = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz"
+ File normalSnpVcfIndex = "~{outputDir}/~{referenceName}.amber.snp.vcf.gz.tbi"
+ Array[File] outputs = [version, tumorBafPcf, tumorBafTsv, tumorBafVcf, tumorBafVcfIndex,
+ tumorContaminationVcf, tumorContaminationVcfIndex, tumorContaminationTsv, tumorQc,
+ normalSnpVcf, normalSnpVcfIndex]
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ cpu: threads
+ }
+
+ parameter_meta {
+ referenceName: {description: "the name of the normal sample.", category: "required"}
+ referenceBam: {description: "The normal BAM file.", category: "required"}
+ referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"}
+ tumorName: {description: "The name of the tumor sample.", category: "required"}
+ tumorBam: {description: "The tumor BAM file.", category: "required"}
+ tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"}
+ outputDir: {description: "The path to the output directory.", category: "common"}
+ loci: {description: "A VCF file containing likely heterozygous sites.", category: "required"}
+ referenceFasta: {description: "The reference fasta file.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
+ category: "required"}
+ referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ threads: {description: "The number of threads the program will use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Cobalt {
+ input {
+ String referenceName
+ File referenceBam
+ File referenceBamIndex
+ String tumorName
+ File tumorBam
+ File tumorBamIndex
+ String outputDir = "./cobalt"
+ File gcProfile
+
+ Int threads = 1
+ String memory = "5GiB"
+ String javaXmx = "4G"
+ Int timeMinutes = 480
+ String dockerImage = "quay.io/biocontainers/hmftools-cobalt:1.11--0"
+ }
+
+ command {
+ COBALT -Xmx~{javaXmx} \
+ -reference ~{referenceName} \
+ -reference_bam ~{referenceBam} \
+ -tumor ~{tumorName} \
+ -tumor_bam ~{tumorBam} \
+ -output_dir ~{outputDir} \
+ -threads ~{threads} \
+ -gc_profile ~{gcProfile}
+ }
+
+ output {
+ File version = "~{outputDir}/cobalt.version"
+ File normalGcMedianTsv = "~{outputDir}/~{referenceName}.cobalt.gc.median.tsv"
+ File normalRationMedianTsv = "~{outputDir}/~{referenceName}.cobalt.ratio.median.tsv"
+ File normalRationPcf = "~{outputDir}/~{referenceName}.cobalt.ratio.pcf"
+ File tumorGcMedianTsv = "~{outputDir}/~{tumorName}.cobalt.gc.median.tsv"
+ File tumorRatioPcf = "~{outputDir}/~{tumorName}.cobalt.ratio.pcf"
+ File tumorRatioTsv = "~{outputDir}/~{tumorName}.cobalt.ratio.tsv"
+ File tumorChrLen = "~{outputDir}/~{tumorName}.chr.len"
+ Array[File] outputs = [version, normalGcMedianTsv, normalRationMedianTsv,
+ normalRationPcf, tumorGcMedianTsv, tumorRatioPcf, tumorRatioTsv, tumorChrLen]
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ cpu: threads
+ }
+
+ parameter_meta {
+ referenceName: {description: "the name of the normal sample.", category: "required"}
+ referenceBam: {description: "The normal BAM file.", category: "required"}
+ referenceBamIndex: {description: "The index for the normal BAM file.", category: "required"}
+ tumorName: {description: "The name of the tumor sample.", category: "required"}
+ tumorBam: {description: "The tumor BAM file.", category: "required"}
+ tumorBamIndex: {description: "The index for the tumor BAM file.", category: "required"}
+ outputDir: {description: "The path to the output directory.", category: "common"}
+ gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"}
+ threads: {description: "The number of threads the program will use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task CupGenerateReport {
+ input {
+ String sampleName
+ File cupData
+ String outputDir = "./cuppa"
+
+ String memory = "5GiB"
+ Int timeMinutes = 10
+ String dockerImage = "quay.io/biowdl/cuppa:1.6"
+ }
+
+ # This script writes to the directory that the input is located in.
+ # Giving the input directly will cause the script to write in the
+ # locallized input dir, which may cause issues with write permissions
+ # in certain execution engines or backends. We, therefore, make links
+ # to a working directory, and give that directory as input instead.
+ # We can't just use the outputDir directly. This could be an
+ # absolute path in which case the linking might fail due to name
+ # collisions. Outputs are copied to the given output dir afterwards.
+ command {
+ set -e
+ mkdir -p ./workdir ~{outputDir}
+ ln -s -t workdir ~{cupData}
+ CupGenerateReport \
+ ~{sampleName} \
+ workdir/
+ mv -t ~{outputDir} \
+ ./workdir/~{sampleName}.cup.report.summary.png \
+ ./workdir/~{sampleName}_cup_report.pdf
+ if [ -f ./workdir/~{sampleName}.cup.report.features.png ]
+ then
+ mv -t ~{outputDir} \
+ ./workdir/~{sampleName}.cup.report.features.png
+ fi
+ }
+
+ output {
+ File summaryPng = "~{outputDir}/~{sampleName}.cup.report.summary.png"
+ File? featuresPng = "~{outputDir}/~{sampleName}.cup.report.features.png"
+ File reportPdf = "~{outputDir}/~{sampleName}_cup_report.pdf"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ sampleName: {description: "The sample id.", category: "required"}
+ cupData: {description: "The output produced by cuppa.", category: "required"}
+ outputDir: {description: "The directory the ouput will be placed in.", category: "common"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Cuppa {
+ input {
+ Array[File]+ linxOutput
+ Array[File]+ purpleOutput
+ String sampleName
+ Array[String]+ categories = ["DNA"]
+ Array[File]+ referenceData
+ File purpleSvVcf
+ File purpleSvVcfIndex
+ File purpleSomaticVcf
+ File purpleSomaticVcfIndex
+ String outputDir = "./cuppa"
+
+ String javaXmx = "4G"
+ String memory = "5GiB"
+ Int timeMinutes = 10
+ String dockerImage = "quay.io/biowdl/cuppa:1.6"
+ }
+
+ command {
+ set -e
+ mkdir -p sampleData ~{outputDir}
+ ln -s -t sampleData ~{sep=" " linxOutput} ~{sep=" " purpleOutput}
+ cuppa -Xmx~{javaXmx} \
+ -output_dir ~{outputDir} \
+ -output_id ~{sampleName} \
+ -categories '~{sep="," categories}' \
+ -ref_data_dir ~{sub(referenceData[0], basename(referenceData[0]), "")} \
+ -sample_data_dir sampleData \
+ -sample_data ~{sampleName} \
+ -sample_sv_file ~{purpleSvVcf} \
+ -sample_somatic_vcf ~{purpleSomaticVcf}
+ }
+
+ output {
+ File cupData = "~{outputDir}/~{sampleName}.cup.data.csv"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ linxOutput: {description: "The files produced by linx.", category: "required"}
+ purpleOutput: {description: "The files produced by purple.", category: "required"}
+ sampleName: {description: "The name of the sample.", category: "required"}
+ categories: {description: "The classifiers to use.", category: "advanced"}
+ referenceData : {description: "The reference data.", category: "required"}
+ purpleSvVcf: {description: "The VCF file produced by purple which contains structural variants.", category: "required"}
+ purpleSvVcfIndex: {description: "The index of the structural variants VCF file produced by purple.", category: "required"}
+ purpleSomaticVcf: {description: "The VCF file produced by purple which contains somatic variants.", category: "required"}
+ purpleSomaticVcfIndex: {description: "The index of the somatic VCF file produced by purple.", category: "required"}
+ outputDir: {description: "The directory the ouput will be placed in.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task CuppaChart {
+ input {
+ String sampleName
+ File cupData
+ String outputDir = "./cuppa"
+
+ String memory = "4GiB"
+ Int timeMinutes = 5
+ String dockerImage = "quay.io/biowdl/cuppa:1.6"
+ }
+
+ command {
+ set -e
+ mkdir -p ~{outputDir}
+ cuppa-chart \
+ -sample ~{sampleName} \
+ -sample_data ~{cupData} \
+ -output_dir ~{outputDir}
+ }
+
+ output {
+ File cuppaChart = "~{outputDir}/~{sampleName}.cuppa.chart.png"
+ File cuppaConclusion = "~{outputDir}/~{sampleName}.cuppa.conclusion.txt"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ sampleName: {description: "The name of the sample.", category:"common"}
+ cupData: {description: "The cuppa output.", category: "required"}
+ outputDir: {description: "The directory the output will be written to.", category:"common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Gripss {
+ input {
+ File referenceFasta
+ File referenceFastaFai
+ File referenceFastaDict
+ File knownFusionPairBedpe
+ File breakendPon
+ File breakpointPon
+ String referenceName
+ String tumorName
+ File vcf
+ File vcfIndex
+ String outputDir = "./"
+
+ String memory = "17GiB"
+ String javaXmx = "16G"
+ Int timeMinutes = 50
+ String dockerImage = "quay.io/biocontainers/hmftools-gripss:2.0--hdfd78af_0"
+ }
+
+ command {
+ set -e
+ mkdir -p ~{outputDir}
+ gripss -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -ref_genome ~{referenceFasta} \
+ -known_hotspot_file ~{knownFusionPairBedpe} \
+ -pon_sgl_file ~{breakendPon} \
+ -pon_sv_file ~{breakpointPon} \
+ -reference ~{referenceName} \
+ -sample ~{tumorName} \
+ -vcf ~{vcf} \
+ -output_dir ~{outputDir} \
+ -output_id somatic
+ }
+
+ output {
+ File fullVcf = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz"
+ File fullVcfIndex = "~{outputDir}/~{tumorName}.gripss.somatic.vcf.gz.tbi"
+ File filteredVcf = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz"
+ File filteredVcfIndex = "~{outputDir}/~{tumorName}.gripss.filtered.somatic.vcf.gz.tbi"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ referenceFasta: {description: "The reference fasta file.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
+ category: "required"}
+ referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ knownFusionPairBedpe: {description: "Equivalent to the `-known_hotspot_file` option.", category: "required"}
+ breakendPon: {description: "Equivalent to the `-pon_sgl_file` option.", category: "required"}
+ breakpointPon: {description: "Equivalent to the `-pon_sv_file` option.", category: "required"}
+ tumorName: {description: "The name of the tumor sample.", category: "required"}
+ referenceName: {description: "The name of the normal sample.", category: "required"}
+ vcf: {description: "The input VCF.", category: "required"}
+ vcfIndex: {description: "The index for the input VCF.", category: "required"}
+ outputDir: {description: "The path the output will be written to.", category:"required"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task GripssApplicationKt {
+ # Obsolete
+ input {
+ File inputVcf
+ String outputPath = "gripss.vcf.gz"
+ String tumorName
+ String referenceName
+ File referenceFasta
+ File referenceFastaFai
+ File referenceFastaDict
+ File breakpointHotspot
+ File breakendPon
+ File breakpointPon
+
+ String memory = "32GiB"
+ String javaXmx = "31G"
+ Int timeMinutes = 45
+ String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0"
+ }
+
+ command {
+ java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \
+ com.hartwig.hmftools.gripss.GripssApplicationKt \
+ -tumor ~{tumorName} \
+ -reference ~{referenceName} \
+ -ref_genome ~{referenceFasta} \
+ -breakpoint_hotspot ~{breakpointHotspot} \
+ -breakend_pon ~{breakendPon} \
+ -breakpoint_pon ~{breakpointPon} \
+ -input_vcf ~{inputVcf} \
+ -output_vcf ~{outputPath} \
+ -paired_normal_tumor_ordinals
+ }
+
+ output {
+ File outputVcf = outputPath
+ File outputVcfIndex = outputPath + ".tbi"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ inputVcf: {description: "The input VCF.", category: "required"}
+ outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"}
+ referenceName: {description: "The name of the normal sample.", category: "required"}
+ tumorName: {description: "The name of the tumor sample.", category: "required"}
+ referenceFasta: {description: "The reference fasta file.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
+ category: "required"}
+ referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ breakpointHotspot: {description: "Equivalent to the `-breakpoint_hotspot` option.", category: "required"}
+ breakendPon: {description: "Equivalent to the `-breakend_pon` option.", category: "required"}
+ breakpointPon: {description: "Equivalent to the `-breakpoint_pon` option.", category: "required"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task GripssHardFilterApplicationKt {
+ # Obsolete
+ input {
+ File inputVcf
+ String outputPath = "gripss_hard_filter.vcf.gz"
+
+ String memory = "3GiB"
+ String javaXmx = "2G"
+ Int timeMinutes = 15
+ String dockerImage = "quay.io/biocontainers/hmftools-gripss:1.11--hdfd78af_0"
+ }
+
+ command {
+ java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -cp /usr/local/share/hmftools-gripss-1.11-0/gripss.jar \
+ com.hartwig.hmftools.gripss.GripssHardFilterApplicationKt \
+ -input_vcf ~{inputVcf} \
+ -output_vcf ~{outputPath}
+ }
+
+ output {
+ File outputVcf = outputPath
+ File outputVcfIndex = outputPath + ".tbi"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ inputVcf: {description: "The input VCF.", category: "required"}
+ outputPath: {description: "The path where th eoutput VCF will be written.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task HealthChecker {
+ input {
+ String outputDir = "."
+ String referenceName
+ File referenceFlagstats
+ File referenceMetrics
+ String tumorName
+ File tumorFlagstats
+ File tumorMetrics
+ Array[File]+ purpleOutput
+
+ String javaXmx = "2G"
+ String memory = "3GiB"
+ Int timeMinutes = 1
+ String dockerImage = "quay.io/biowdl/health-checker:3.2"
+ }
+
+ command {
+ set -e
+ mkdir -p ~{outputDir}
+ health-checker -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -reference ~{referenceName} \
+ -ref_flagstat_file ~{referenceFlagstats} \
+ -ref_wgs_metrics_file ~{referenceMetrics} \
+ -tumor ~{tumorName} \
+ -tum_flagstat_file ~{tumorFlagstats} \
+ -tum_wgs_metrics_file ~{tumorMetrics} \
+ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \
+ -output_dir ~{outputDir}
+ if [ -e '~{outputDir}/~{tumorName}.HealthCheckSucceeded' ]
+ then
+ echo 'true' > '~{outputDir}/succeeded'
+ fi
+ if [ -e '~{outputDir}/~{tumorName}.HealthCheckFailed' ]
+ then
+ echo 'false' > '~{outputDir}/succeeded'
+ fi
+ }
+
+ output {
+ Boolean succeeded = read_boolean("succeeded")
+ File outputFile = if succeeded
+ then "~{outputDir}/~{tumorName}.HealthCheckSucceeded"
+ else "~{outputDir}/~{tumorName}.HealthCheckFailed"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ outputDir: {description: "The path the output will be written to.", category:"required"}
+ referenceName: {description: "The name of the normal sample.", category: "required"}
+ referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"}
+ referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"}
+ tumorName: {description: "The name of the tumor sample.", category: "required"}
+ tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"}
+ tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"}
+ purpleOutput: {description: "The files from purple's output directory.", category: "required"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Linx {
+ input {
+ String sampleName
+ File svVcf
+ File svVcfIndex
+ Array[File]+ purpleOutput
+ String refGenomeVersion
+ String outputDir = "./linx"
+ File fragileSiteCsv
+ File lineElementCsv
+ File knownFusionCsv
+ File driverGenePanel
+ Boolean writeAllVisFusions = false
+ #The following should be in the same directory.
+ File geneDataCsv
+ File proteinFeaturesCsv
+ File transExonDataCsv
+ File transSpliceDataCsv
+
+ String memory = "9GiB"
+ String javaXmx = "8G"
+ Int timeMinutes = 10
+ String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0"
+ }
+
+ command {
+ linx -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -sample ~{sampleName} \
+ -sv_vcf ~{svVcf} \
+ -purple_dir ~{sub(purpleOutput[0], basename(purpleOutput[0]), "")} \
+ -ref_genome_version ~{refGenomeVersion} \
+ -output_dir ~{outputDir} \
+ -fragile_site_file ~{fragileSiteCsv} \
+ -line_element_file ~{lineElementCsv} \
+ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
+ -check_fusions \
+ -known_fusion_file ~{knownFusionCsv} \
+ -check_drivers \
+ -driver_gene_panel ~{driverGenePanel} \
+ -chaining_sv_limit 0 \
+ -write_vis_data \
+ ~{if writeAllVisFusions then "-write_all_vis_fusions" else ""}
+ }
+
+ output {
+ File driverCatalog = "~{outputDir}/~{sampleName}.linx.driver.catalog.tsv"
+ File linxBreakend = "~{outputDir}/~{sampleName}.linx.breakend.tsv"
+ File linxClusters = "~{outputDir}/~{sampleName}.linx.clusters.tsv"
+ File linxDrivers = "~{outputDir}/~{sampleName}.linx.drivers.tsv"
+ File linxFusion = "~{outputDir}/~{sampleName}.linx.fusion.tsv"
+ File linxLinks = "~{outputDir}/~{sampleName}.linx.links.tsv"
+ File linxSvs = "~{outputDir}/~{sampleName}.linx.svs.tsv"
+ File linxVisCopyNumber = "~{outputDir}/~{sampleName}.linx.vis_copy_number.tsv"
+ File linxVisFusion = "~{outputDir}/~{sampleName}.linx.vis_fusion.tsv"
+ File linxVisGeneExon = "~{outputDir}/~{sampleName}.linx.vis_gene_exon.tsv"
+ File linxVisProteinDomain = "~{outputDir}/~{sampleName}.linx.vis_protein_domain.tsv"
+ File linxVisSegments = "~{outputDir}/~{sampleName}.linx.vis_segments.tsv"
+ File linxVisSvData = "~{outputDir}/~{sampleName}.linx.vis_sv_data.tsv"
+ File linxVersion = "~{outputDir}/linx.version"
+ Array[File] outputs = [driverCatalog, linxBreakend, linxClusters, linxDrivers, linxFusion,
+ linxLinks, linxSvs, linxVisCopyNumber, linxVisFusion,
+ linxVisGeneExon, linxVisProteinDomain, linxVisSegments, linxVisSvData,
+ linxVersion]
+ }
+
+ runtime {
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ memory: memory
+ }
+
+ parameter_meta {
+ sampleName: {description: "The name of the sample.", category: "required"}
+ svVcf: {description: "A VCF file containing structural variants, produced using GRIDSS, annotated for viral insertions and postprocessed with GRIPSS.", category: "required"}
+ svVcfIndex: {description: "Index for the structural variants VCf file.", category: "required"}
+ purpleOutput: {description: "The files produced by PURPLE.", category: "required"}
+ refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
+ outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+ fragileSiteCsv: {description: "A list of known fragile sites.", category: "required"}
+ lineElementCsv: {description: "A list of known LINE source regions.", category: "required"}
+ knownFusionCsv: {description: "A CSV file describing known fusions.", category: "required"}
+ driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"}
+ writeAllVisFusions: {description: "Equivalent to the -write_all_vis_fusions flag.", category: "advanced"}
+ geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+ proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+ transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
+ transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task LinxVisualisations {
+ input {
+ String outputDir = "./linx_visualisation"
+ String sample
+ String refGenomeVersion
+ Array[File]+ linxOutput
+ Boolean plotReportable = true
+
+ String memory = "9GiB"
+ String javaXmx = "8G"
+ Int timeMinutes = 1440
+ String dockerImage = "quay.io/biocontainers/hmftools-linx:1.18--hdfd78af_0"
+ }
+
+ command {
+ set -e
+ mkdir -p ~{outputDir}
+ java -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -cp /usr/local/share/hmftools-linx-1.18-0/sv-linx.jar \
+ com.hartwig.hmftools.linx.visualiser.SvVisualiser \
+ -sample ~{sample} \
+ -ref_genome_version ~{refGenomeVersion} \
+ -circos /usr/local/bin/circos \
+ -vis_file_dir ~{sub(linxOutput[0], basename(linxOutput[0]), "")} \
+ -data_out ~{outputDir}/circos \
+ -plot_out ~{outputDir}/plots \
+ ~{if plotReportable then "-plot_reportable" else ""}
+ }
+
+ output {
+ Array[File] circos = glob("~{outputDir}/circos/*")
+ Array[File] plots = glob("~{outputDir}/plots/*")
+ }
+
+ runtime {
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ memory: memory
+ }
+
+ parameter_meta {
+ outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+ sample: {description: "The sample's name.", category: "required"}
+ refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
+ linxOutput: {description: "The directory containing the linx output.", category: "required"}
+ plotReportable: {description: "Equivalent to the -plot_reportable flag.", category: "advanced"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Orange {
+ input {
+ String outputDir = "./orange"
+ File doidJson
+ Array[String] sampleDoids
+ String tumorName
+ String referenceName
+ File referenceMetrics
+ File tumorMetrics
+ File referenceFlagstats
+ File tumorFlagstats
+ File sageGermlineGeneCoverageTsv
+ File sageSomaticRefSampleBqrPlot
+ File sageSomaticTumorSampleBqrPlot
+ File purpleGeneCopyNumberTsv
+ File purpleGermlineDriverCatalogTsv
+ File purpleGermlineVariantVcf
+ File purpleGermlineVariantVcfIndex
+ Array[File]+ purplePlots
+ File purplePurityTsv
+ File purpleQcFile
+ File purpleSomaticDriverCatalogTsv
+ File purpleSomaticVariantVcf
+ File purpleSomaticVariantVcfIndex
+ File linxFusionTsv
+ File linxBreakendTsv
+ File linxDriverCatalogTsv
+ File linxDriverTsv
+ Array[File]+ linxPlots
+ File cuppaResultCsv
+ File cuppaSummaryPlot
+ File? cuppaFeaturePlot
+ File chordPredictionTxt
+ File peachGenotypeTsv
+ File protectEvidenceTsv
+ File annotatedVirusTsv
+ #File pipelineVersionFile
+ File cohortMappingTsv
+ File cohortPercentilesTsv
+
+ String memory = "17GiB"
+ String javaXmx = "16G"
+ Int timeMinutes = 10
+ String dockerImage = "quay.io/biowdl/orange:v1.6"
+ }
+
+ command {
+ set -e
+ mkdir -p ~{outputDir}
+ orange -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -output_dir ~{outputDir} \
+ -doid_json ~{doidJson} \
+ -primary_tumor_doids '~{sep=";" sampleDoids}' \
+ -max_evidence_level C \
+ -tumor_sample_id ~{tumorName} \
+ -reference_sample_id ~{referenceName} \
+ -ref_sample_wgs_metrics_file ~{referenceMetrics} \
+ -tumor_sample_wgs_metrics_file ~{tumorMetrics} \
+ -ref_sample_flagstat_file ~{referenceFlagstats} \
+ -tumor_sample_flagstat_file ~{tumorFlagstats} \
+ -sage_germline_gene_coverage_tsv ~{sageGermlineGeneCoverageTsv} \
+ -sage_somatic_ref_sample_bqr_plot ~{sageSomaticRefSampleBqrPlot} \
+ -sage_somatic_tumor_sample_bqr_plot ~{sageSomaticTumorSampleBqrPlot} \
+ -purple_gene_copy_number_tsv ~{purpleGeneCopyNumberTsv} \
+ -purple_germline_driver_catalog_tsv ~{purpleGermlineDriverCatalogTsv} \
+ -purple_germline_variant_vcf ~{purpleGermlineVariantVcf} \
+ -purple_plot_directory ~{sub(purplePlots[0], basename(purplePlots[0]), "")} \
+ -purple_purity_tsv ~{purplePurityTsv} \
+ -purple_qc_file ~{purpleQcFile} \
+ -purple_somatic_driver_catalog_tsv ~{purpleSomaticDriverCatalogTsv} \
+ -purple_somatic_variant_vcf ~{purpleSomaticVariantVcf} \
+ -linx_fusion_tsv ~{linxFusionTsv} \
+ -linx_breakend_tsv ~{linxBreakendTsv} \
+ -linx_driver_catalog_tsv ~{linxDriverCatalogTsv} \
+ -linx_driver_tsv ~{linxDriverTsv} \
+ -linx_plot_directory ~{sub(linxPlots[0], basename(linxPlots[0]), "")} \
+ -cuppa_result_csv ~{cuppaResultCsv} \
+ -cuppa_summary_plot ~{cuppaSummaryPlot} \
+ ~{"-cuppa_feature_plot " + cuppaFeaturePlot} \
+ -chord_prediction_txt ~{chordPredictionTxt} \
+ -peach_genotype_tsv ~{peachGenotypeTsv} \
+ -protect_evidence_tsv ~{protectEvidenceTsv} \
+ -annotated_virus_tsv ~{annotatedVirusTsv} \
+ -cohort_mapping_tsv ~{cohortMappingTsv} \
+ -cohort_percentiles_tsv ~{cohortPercentilesTsv}
+ }
+ #TODO may need to be added: -pipeline_version_file ~{pipelineVersionFile}
+
+ output {
+ File orangeJson = "~{outputDir}/~{tumorName}.orange.json"
+ File orangePdf = "~{outputDir}/~{tumorName}.orange.pdf"
+ }
+
+ runtime {
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ memory: memory
+ }
+
+ parameter_meta {
+ outputDir: {description: "The directory the outputs will be written to.", category: "common"}
+ doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"}
+ sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"}
+ tumorName: {description: "The name of the tumor sample.", category: "required"}
+ referenceName: {description: "The name of the normal sample.", category: "required"}
+ referenceMetrics: {description: "The picard WGS metrics for the normal sample.", category: "required"}
+ tumorMetrics: {description: "The picard WGS metrics for the tumor sample.", category: "required"}
+ referenceFlagstats: {description: "The flagstats for the normal sample.", category: "required"}
+ tumorFlagstats: {description: "The flagstats for the tumor sample.", category: "required"}
+ sageGermlineGeneCoverageTsv: {description: "Gene coverage file produced by the germline sage run.", category: "required"}
+ sageSomaticRefSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"}
+ sageSomaticTumorSampleBqrPlot: {description: "The reference bqr plot produced by the somatic sage run.", category: "required"}
+ purpleGeneCopyNumberTsv: {description: "Copy number tsv produced by purple.", category: "required"}
+ purpleGermlineDriverCatalogTsv: {description: "Germline driver catalog produced by purple.", category: "required"}
+ purpleGermlineVariantVcf: {description: "Germline variant vcf produced by purple.", category: "required"}
+ purplePlots: {description: "The plots generated by purple.", category: "required"}
+ purplePurityTsv: {description: "The purity file produced by purple.", category: "required"}
+ purpleQcFile: {description: "The qc file produced by purple.", category: "required"}
+ purpleSomaticDriverCatalogTsv: {description: "Somatic driver catalog produced by purple.", category: "required"}
+ purpleSomaticVariantVcf: {description: "Somatic variant vcf produced by purple.", category: "required"}
+ linxFusionTsv: {description: "The fusions tsv produced by linx.", category: "required"}
+ linxBreakendTsv: {description: "The breakend tsv produced by linx.", category: "required"}
+ linxDriverCatalogTsv: {description: "The driver catalog produced by linx.", category: "required"}
+ linxDriverTsv: {description: "The driver tsv produced by linx.", category: "required"}
+ linxPlots: {description: "The plots generated by linx.", category: "required"}
+ cuppaResultCsv: {description: "The cuppa results csv.", category: "required"}
+ cuppaSummaryPlot: {description: "The cuppa summary plot.", category: "required"}
+ cuppaFeaturePlot: {description: "The cuppa feature plot.", category: "common"}
+ chordPredictionTxt: {description: "Chord prediction results.", category: "required"}
+ peachGenotypeTsv: {description: "Genotype tsv produced by peach.", category: "required"}
+ protectEvidenceTsv: {description: "Evidence tsv produced by protect.", category: "required"}
+ annotatedVirusTsv: {description: "Annotated virus tsv produced by virus-interpreter.", category: "required"}
+ #pipelineVersionFile: {description: "", category: "required"}
+ cohortMappingTsv: {description: "Cohort mapping file from the HMFTools resources.", category: "required"}
+ cohortPercentilesTsv: {description: "Cohort percentile file from the HMFTools resources.", category: "required"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Pave {
+ input {
+ String outputDir = "./"
+ String sampleName
+ File vcfFile
+ File vcfFileIndex
+ File referenceFasta
+ File referenceFastaFai
+ File referenceFastaDict
+ String refGenomeVersion
+ File driverGenePanel
+ #The following should be in the same directory.
+ File geneDataCsv
+ File proteinFeaturesCsv
+ File transExonDataCsv
+ File transSpliceDataCsv
+
+ Int timeMinutes = 50
+ String javaXmx = "8G"
+ String memory = "9GiB"
+ String dockerImage = "quay.io/biowdl/pave:v1.0"
+ }
+
+ command {
+ set -e
+ mkdir -p ~{outputDir}
+ pave -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -sample ~{sampleName} \
+ -vcf_file ~{vcfFile} \
+ -output_dir ~{outputDir} \
+ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
+ -ref_genome ~{referenceFasta} \
+ -ref_genome_version ~{refGenomeVersion} \
+ -driver_gene_panel ~{driverGenePanel}
+ }
+
+ output {
+ File outputVcf = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz')}"
+ File outputVcfIndex = "~{outputDir}/~{sub(basename(vcfFile), 'vcf.gz$', 'pave.vcf.gz.tbi')}"
+ }
+
+ runtime {
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ memory: memory
+ }
+
+ parameter_meta {
+ outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+ sampleName: {description: "The name of the sample.", category: "required"}
+ vcfFile: {description: "The input VCF file.", category: "required"}
+ vcfFileIndex: {description: "The index for the input vcf file.", category: "required"}
+ referenceFasta: {description: "The reference fasta file.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
+ category: "required"}
+ referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"HG19\" or \"HG38\".", category: "required"}
+ driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"}
+ geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+ proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+ transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
+ transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Protect {
+ input {
+ String refGenomeVersion
+ String tumorName
+ String referenceName
+ Array[String]+ sampleDoids
+ String outputDir = "."
+ Array[File]+ serveActionability
+ File doidJson
+ File purplePurity
+ File purpleQc
+ File purpleDriverCatalogSomatic
+ File purpleDriverCatalogGermline
+ File purpleSomaticVariants
+ File purpleSomaticVariantsIndex
+ File purpleGermlineVariants
+ File purpleGermlineVariantsIndex
+ File purpleGeneCopyNumber
+ File linxFusion
+ File linxBreakend
+ File linxDriversCatalog
+ File chordPrediction
+ File annotatedVirus
+
+ String memory = "9GiB"
+ String javaXmx = "8G"
+ Int timeMinutes = 60
+ String dockerImage = "quay.io/biowdl/protect:v2.0"
+ }
+
+ command {
+ protect -Xmx~{javaXmx} \
+ -ref_genome_version ~{refGenomeVersion} \
+ -tumor_sample_id ~{tumorName} \
+ -reference_sample_id ~{referenceName} \
+ -primary_tumor_doids '~{sep=";" sampleDoids}' \
+ -output_dir ~{outputDir} \
+ -serve_actionability_dir ~{sub(serveActionability[0], basename(serveActionability[0]), "")} \
+ -doid_json ~{doidJson} \
+ -purple_purity_tsv ~{purplePurity} \
+ -purple_qc_file ~{purpleQc} \
+ -purple_somatic_driver_catalog_tsv ~{purpleDriverCatalogSomatic} \
+ -purple_germline_driver_catalog_tsv ~{purpleDriverCatalogGermline} \
+ -purple_somatic_variant_vcf ~{purpleSomaticVariants} \
+ -purple_germline_variant_vcf ~{purpleGermlineVariants} \
+ -purple_gene_copy_number_tsv ~{purpleGeneCopyNumber} \
+ -linx_fusion_tsv ~{linxFusion} \
+ -linx_breakend_tsv ~{linxBreakend} \
+ -linx_driver_catalog_tsv ~{linxDriversCatalog} \
+ -chord_prediction_txt ~{chordPrediction} \
+ -annotated_virus_tsv ~{annotatedVirus}
+ }
+
+ output {
+ File protectTsv = "~{outputDir}/~{tumorName}.protect.tsv"
+ }
+
+ runtime {
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ memory: memory
+ }
+
+ parameter_meta {
+ refGenomeVersion: {description: "The version of the genome assembly used for alignment. Either \"37\" or \"38\".", category: "required"}
+ tumorName: {description: "The name of the tumor sample.", category: "required"}
+ referenceName: {description: "The name of the normal sample.", category: "required"}
+ sampleDoids: {description: "The DOIDs (Human Disease Ontology) for the primary tumor.", category: "required"}
+ outputDir: {description: "The directory the outputs will be written to.", category: "required"}
+ serveActionability: {description: "The actionability files generated by hmftools' serve.", category: "required"}
+ doidJson: {description: "A json with the DOID (Human Disease Ontology) tree.", category: "required"}
+ purplePurity: {description: "The purity file generated by purple.", category: "required"}
+ purpleQc: {description: "The QC file generated by purple.", category: "required"}
+ purpleDriverCatalogSomatic: {description: "The somatic driver catalog generated by purple.", category: "required"}
+ purpleDriverCatalogGermline: {description: "The germline driver catalog generated by purple.", category: "required"}
+ purpleSomaticVariants: {description: "The somatic VCF generated by purple.", category: "required"}
+ purpleSomaticVariantsIndex: {description: "The index for the somatic VCF generated by purple.", category: "required"}
+ purpleGermlineVariants: {description: "The germline VCF generated by purple.", category: "required"}
+ purpleGermlineVariantsIndex: {description: "The index of the germline VCF generated by purple.", category: "required"}
+ purpleGeneCopyNumber: {description: "The gene copy number file generated by purple.", category: "required"}
+ linxFusion: {description: "The fusion file generated by linx.", category: "required"}
+ linxBreakend: {description: "The breakend file generated by linx.", category: "required"}
+ linxDriversCatalog: {description: "The driver catalog generated generated by linx.", category: "required"}
+ chordPrediction: {description: "The chord prediction file.", category: "required"}
+ annotatedVirus: {description: "The virus-interpreter output.", category: "required"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Purple {
+ input {
+ String referenceName
+ String tumorName
+ String outputDir = "./purple"
+ Array[File]+ amberOutput
+ Array[File]+ cobaltOutput
+ File gcProfile
+ File somaticVcf
+ File germlineVcf
+ File filteredSvVcf
+ File filteredSvVcfIndex
+ File fullSvVcf
+ File fullSvVcfIndex
+ File referenceFasta
+ File referenceFastaFai
+ File referenceFastaDict
+ File driverGenePanel
+ File somaticHotspots
+ File germlineHotspots
+ Float? highlyDiploidPercentage
+ Float? somaticMinPuritySpread
+ #The following should be in the same directory.
+ File geneDataCsv
+ File proteinFeaturesCsv
+ File transExonDataCsv
+ File transSpliceDataCsv
+
+ Int threads = 1
+ Int timeMinutes = 30
+ String memory = "9GiB"
+ String javaXmx = "8G"
+ # clone of quay.io/biocontainers/hmftools-purple:3.2--hdfd78af_0 with 'ln -s /usr/local/lib/libwebp.so.7 /usr/local/lib/libwebp.so.6'
+ String dockerImage = "quay.io/biowdl/hmftools-purple:3.2"
+ }
+
+ command {
+ PURPLE -Xmx~{javaXmx} \
+ -reference ~{referenceName} \
+ -germline_vcf ~{germlineVcf} \
+ -germline_hotspots ~{germlineHotspots} \
+ -tumor ~{tumorName} \
+ -output_dir ~{outputDir} \
+ -amber ~{sub(amberOutput[0], basename(amberOutput[0]), "")} \
+ -cobalt ~{sub(cobaltOutput[0], basename(cobaltOutput[0]), "")} \
+ -gc_profile ~{gcProfile} \
+ -somatic_vcf ~{somaticVcf} \
+ -structural_vcf ~{filteredSvVcf} \
+ -sv_recovery_vcf ~{fullSvVcf} \
+ -circos /usr/local/bin/circos \
+ -ref_genome ~{referenceFasta} \
+ -ensembl_data_dir ~{sub(geneDataCsv, basename(geneDataCsv), "")} \
+ -run_drivers \
+ -somatic_hotspots ~{somaticHotspots} \
+ -driver_gene_panel ~{driverGenePanel} \
+ ~{"-highly_diploid_percentage " + highlyDiploidPercentage} \
+ ~{"-somatic_min_purity_spread " + somaticMinPuritySpread} \
+ -threads ~{threads}
+ }
+
+ output {
+ File driverCatalogGermlineTsv = "~{outputDir}/~{tumorName}.driver.catalog.germline.tsv"
+ File driverCatalogSomaticTsv = "~{outputDir}/~{tumorName}.driver.catalog.somatic.tsv"
+ File purpleCnvGeneTsv = "~{outputDir}/~{tumorName}.purple.cnv.gene.tsv"
+ File purpleCnvSomaticTsv = "~{outputDir}/~{tumorName}.purple.cnv.somatic.tsv"
+ File purpleGermlineDeletionTsv = "~{outputDir}/~{tumorName}.purple.germline.deletion.tsv"
+ File purpleGermlineVcf = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz"
+ File purpleGermlineVcfIndex = "~{outputDir}/~{tumorName}.purple.germline.vcf.gz.tbi"
+ File purplePurityRangeTsv = "~{outputDir}/~{tumorName}.purple.purity.range.tsv"
+ File purplePurityTsv = "~{outputDir}/~{tumorName}.purple.purity.tsv"
+ File purpleQc = "~{outputDir}/~{tumorName}.purple.qc"
+ File purpleSegmentTsv = "~{outputDir}/~{tumorName}.purple.segment.tsv"
+ File purpleSomaticClonalityTsv = "~{outputDir}/~{tumorName}.purple.somatic.clonality.tsv"
+ File purpleSomaticHistTsv = "~{outputDir}/~{tumorName}.purple.somatic.hist.tsv"
+ File purpleSomaticVcf = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz"
+ File purpleSomaticVcfIndex = "~{outputDir}/~{tumorName}.purple.somatic.vcf.gz.tbi"
+ File purpleSvVcf = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz"
+ File purpleSvVcfIndex = "~{outputDir}/~{tumorName}.purple.sv.vcf.gz.tbi"
+ File purpleVersion = "~{outputDir}/purple.version"
+ File circosPlot = "~{outputDir}/plot/~{tumorName}.circos.png"
+ File copynumberPlot = "~{outputDir}/plot/~{tumorName}.copynumber.png"
+ File inputPlot = "~{outputDir}/plot/~{tumorName}.input.png"
+ File mapPlot = "~{outputDir}/plot/~{tumorName}.map.png"
+ File purityRangePlot = "~{outputDir}/plot/~{tumorName}.purity.range.png"
+ File segmentPlot = "~{outputDir}/plot/~{tumorName}.segment.png"
+ File somaticClonalityPlot = "~{outputDir}/plot/~{tumorName}.somatic.clonality.png"
+ File somaticPlot = "~{outputDir}/plot/~{tumorName}.somatic.png"
+ File? somaticRainfallPlot = "~{outputDir}/plot/~{tumorName}.somatic.rainfall.png"
+ File circosNormalRatio = "~{outputDir}/circos/~{referenceName}.ratio.circos"
+ File circosBaf = "~{outputDir}/circos/~{tumorName}.baf.circos"
+ File circosConf = "~{outputDir}/circos/~{tumorName}.circos.conf"
+ File circosCnv = "~{outputDir}/circos/~{tumorName}.cnv.circos"
+ File circosIndel = "~{outputDir}/circos/~{tumorName}.indel.circos"
+ File circosInputConf = "~{outputDir}/circos/~{tumorName}.input.conf"
+ File circosLink = "~{outputDir}/circos/~{tumorName}.link.circos"
+ File circosMap = "~{outputDir}/circos/~{tumorName}.map.circos"
+ File circosTumorRatio = "~{outputDir}/circos/~{tumorName}.ratio.circos"
+ File circosSnp = "~{outputDir}/circos/~{tumorName}.snp.circos"
+ File circosGaps = "~{outputDir}/circos/gaps.txt"
+ Array[File] outputs = [driverCatalogSomaticTsv, purpleCnvGeneTsv,
+ purpleCnvSomaticTsv, purplePurityRangeTsv, purplePurityTsv, purpleQc,
+ purpleSegmentTsv, purpleSomaticClonalityTsv, purpleSomaticHistTsv,
+ purpleSomaticVcf, purpleSomaticVcfIndex, purpleSvVcf, purpleSvVcfIndex,
+ purpleVersion, purpleGermlineVcf, purpleGermlineVcfIndex, driverCatalogGermlineTsv]
+ Array[File] plots = select_all([circosPlot, copynumberPlot, inputPlot, mapPlot, purityRangePlot,
+ segmentPlot, somaticClonalityPlot, somaticPlot, somaticRainfallPlot])
+ Array[File] circos = [circosNormalRatio, circosConf, circosIndel, circosLink,
+ circosTumorRatio, circosGaps, circosBaf, circosCnv, circosInputConf, circosMap,
+ circosSnp]
+ }
+
+ runtime {
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ cpu: threads
+ docker: dockerImage
+ memory: memory
+ }
+
+ parameter_meta {
+ referenceName: {description: "the name of the normal sample.", category: "required"}
+ tumorName: {description: "The name of the tumor sample.", category: "required"}
+ outputDir: {description: "The path to the output directory.", category: "common"}
+ amberOutput: {description: "The output files of hmftools amber.", category: "required"}
+ cobaltOutput: {description: "The output files of hmftools cobalt", category: "required"}
+ gcProfile: {description: "A file describing the GC profile of the reference genome.", category: "required"}
+ somaticVcf: {description: "The somatic variant calling results.", category: "required"}
+ germlineVcf: {description: "The germline variant calling results.", category: "required"}
+ filteredSvVcf: {description: "The filtered structural variant calling results.", category: "required"}
+ fullSvVcf: {description: "The unfiltered structural variant calling results.", category: "required"}
+ referenceFasta: {description: "The reference fasta file.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
+ category: "required"}
+ referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ driverGenePanel: {description: "A TSV file describing the driver gene panel.", category: "required"}
+ somaticHotspots: {description: "A vcf file with hotspot somatic variant sites.", category: "required"}
+ germlineHotspots: {description: "A vcf file with hotspot germline variant sites.", category: "required"}
+ highlyDiploidPercentage: {description: "Equivalent to PURPLE's `-highly_diploid_percentage` option.", category: "advanced"}
+ somaticMinPuritySpread: {description: "Equivalent to PURPLE's `-somatic_min_purity_spread` option.", category: "advanced"}
+ geneDataCsv: {description: "A CSV file containing gene information, must be in the same directory as `proteinFeaturesCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+ proteinFeaturesCsv: {description: "A CSV file containing protein feature information, must be in the same directory as `geneDataCsv`, `transExonDataCsv` and `transSpliceDataCsv`.", category: "required"}
+ transExonDataCsv: {description: "A CSV file containing transcript exon information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transSpliceDataCsv`.", category: "required"}
+ transSpliceDataCsv: {description: "A CSV file containing transcript splicing information, must be in the same directory as `geneDataCsv`, `proteinFeaturesCsv` and `transExonDataCsv`.", category: "required"}
+
+
+ threads: {description: "The number of threads the program will use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task Sage {
+ input {
+ String tumorName
+ File tumorBam
+ File tumorBamIndex
+ File referenceFasta
+ File referenceFastaDict
+ File referenceFastaFai
+ File hotspots
+ File panelBed
+ File highConfidenceBed
+ Boolean hg38 = false
+ Boolean panelOnly = false
+ String outputPath = "./sage.vcf.gz"
+
+ String? referenceName
+ File? referenceBam
+ File? referenceBamIndex
+ Int? hotspotMinTumorQual
+ Int? panelMinTumorQual
+ Int? hotspotMaxGermlineVaf
+ Int? hotspotMaxGermlineRelRawBaseQual
+ Int? panelMaxGermlineVaf
+ Int? panelMaxGermlineRelRawBaseQual
+ String? mnvFilterEnabled
+ File? coverageBed
+
+ Int threads = 32
+ String javaXmx = "16G"
+ String memory = "20GiB"
+ Int timeMinutes = 720
+ String dockerImage = "quay.io/biocontainers/hmftools-sage:2.8--hdfd78af_1"
+ }
+
+ command {
+ SAGE -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -tumor ~{tumorName} \
+ -tumor_bam ~{tumorBam} \
+ ~{"-reference " + referenceName} \
+ ~{"-reference_bam " + referenceBam} \
+ -ref_genome ~{referenceFasta} \
+ -hotspots ~{hotspots} \
+ -panel_bed ~{panelBed} \
+ -high_confidence_bed ~{highConfidenceBed} \
+ -assembly ~{true="hg38" false="hg19" hg38} \
+ ~{"-hotspot_min_tumor_qual " + hotspotMinTumorQual} \
+ ~{"-panel_min_tumor_qual " + panelMinTumorQual} \
+ ~{"-hotspot_max_germline_vaf " + hotspotMaxGermlineVaf} \
+ ~{"-hotspot_max_germline_rel_raw_base_qual " + hotspotMaxGermlineRelRawBaseQual} \
+ ~{"-panel_max_germline_vaf " + panelMaxGermlineVaf} \
+ ~{"-panel_max_germline_rel_raw_base_qual " + panelMaxGermlineRelRawBaseQual} \
+ ~{"-mnv_filter_enabled " + mnvFilterEnabled} \
+ ~{"-coverage_bed " + coverageBed} \
+ ~{true="-panel_only" false="" panelOnly} \
+ -threads ~{threads} \
+ -out ~{outputPath}
+ }
+
+ output {
+ File outputVcf = outputPath
+ File outputVcfIndex = outputPath + ".tbi"
+ File? referenceSageBqrPng = "~{referenceName}.sage.bqr.png"
+ File? referenceSageBqrTsv = "~{referenceName}.sage.bqr.tsv"
+ File tumorSageBqrPng = "~{tumorName}.sage.bqr.png"
+ File tumorSageBqrTsv = "~{tumorName}.sage.bqr.tsv"
+ File sageGeneCoverageTsv = "~{tumorName}.sage.gene.coverage.tsv"
+ }
+
+ runtime {
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ cpu: threads
+ docker: dockerImage
+ memory: memory
+ }
+
+ parameter_meta {
+ tumorName: {description: "The name of the tumor sample.", category: "required"}
+ tumorBam: {description: "The BAM file for the tumor sample.", category: "required"}
+ tumorBamIndex: {description: "The index of the BAM file for the tumor sample.", category: "required"}
+ referenceName: {description: "The name of the normal/reference sample.", category: "common"}
+ referenceBam: {description: "The BAM file for the normal sample.", category: "common"}
+ referenceBamIndex: {description: "The index of the BAM file for the normal sample.", category: "common"}
+ referenceFasta: {description: "The reference fasta file.", category: "required"}
+ referenceFastaDict: {description: "The sequence dictionary associated with the reference fasta file.",
+ category: "required"}
+ referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ hotspots: {description: "A vcf file with hotspot variant sites.", category: "required"}
+ panelBed: {description: "A bed file describing coding regions to search for in frame indels.", category: "required"}
+ highConfidenceBed: {description: "A bed files describing high confidence mapping regions.", category: "required"}
+ hotspotMinTumorQual: {description: "Equivalent to sage's `hotspot_min_tumor_qual` option.", category: "advanced"}
+ panelMinTumorQual: {description: "Equivalent to sage's `panel_min_tumor_qual` option.", category: "advanced"}
+ hotspotMaxGermlineVaf: {description: "Equivalent to sage's `hotspot_max_germline_vaf` option.", category: "advanced"}
+ hotspotMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `hotspot_max_germline_rel_raw_base_qual` option.", category: "advanced"}
+ panelMaxGermlineVaf: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"}
+ panelMaxGermlineRelRawBaseQual: {description: "Equivalent to sage's `panel_max_germline_vaf` option.", category: "advanced"}
+ mnvFilterEnabled: {description: "Equivalent to sage's `mnv_filter_enabled` option.", category: "advanced"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
+
+task VirusInterpreter {
+ input {
+ String sampleId
+ File purplePurityTsv
+ File prupleQcFile
+ File tumorSampleWgsMetricsFile
+ File virusBreakendTsv
+ File taxonomyDbTsv
+ File virusReportingDbTsv
+ String outputDir = "."
+
+ String memory = "3GiB"
+ String javaXmx = "2G"
+ Int timeMinutes = 15
+ String dockerImage = "quay.io/biowdl/virus-interpreter:1.2"
+ }
+
+ command {
+ virus-interpreter -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -sample_id ~{sampleId} \
+ -purple_purity_tsv ~{purplePurityTsv} \
+ -purple_qc_file ~{prupleQcFile} \
+ -tumor_sample_wgs_metrics_file ~{tumorSampleWgsMetricsFile} \
+ -virus_breakend_tsv ~{virusBreakendTsv} \
+ -taxonomy_db_tsv ~{taxonomyDbTsv} \
+ -virus_reporting_db_tsv ~{virusReportingDbTsv} \
+ -output_dir ~{outputDir}
+ }
+
+ output {
+ File virusAnnotatedTsv = "~{outputDir}/~{sampleId}.virus.annotated.tsv"
+ }
+
+ runtime {
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ memory: memory
+ }
+
+ parameter_meta {
+ sampleId: {description: "The name of the sample.", category: "required"}
+ purplePurityTsv: {description: "The purity file produced by purple.", category: "required"}
+ prupleQcFile: {description: "The QC file produced by purple.", category: "required"}
+ tumorSampleWgsMetricsFile: {description: "The picard WGS metrics file for this sample.", category: "required"}
+ virusBreakendTsv: {description: "The TSV output from virusbreakend.", category: "required"}
+ taxonomyDbTsv: {description: "A taxonomy database tsv.", category: "required"}
+ virusReportingDbTsv: {description: "A virus reporting tsv.", category: "required"}
+ outputDir: {description: "The directory the output will be written to.", category: "required"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+ }
+}
diff --git a/htseq.wdl b/htseq.wdl
index cbd8e2ac..92bc4423 100644
--- a/htseq.wdl
+++ b/htseq.wdl
@@ -27,13 +27,14 @@ task HTSeqCount {
String outputTable = "output.tsv"
String order = "pos"
String stranded = "no"
+ Array[String] additionalAttributes = []
+
String? featureType
String? idattr
- Array[String] additionalAttributes = []
Int nprocesses = 1
- String memory = "8G"
- Int timeMinutes = 10 + ceil(size(inputBams, "G") * 60)
+ String memory = "8GiB"
+ Int timeMinutes = 1440 #10 + ceil(size(inputBams, "GiB") * 60) FIXME
String dockerImage = "quay.io/biocontainers/htseq:0.12.4--py37hb3f55d8_0"
}
@@ -58,24 +59,27 @@ task HTSeqCount {
runtime {
cpu: nprocesses
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
+ # inputs
inputBams: {description: "The input BAM files.", category: "required"}
gtfFile: {description: "A GTF/GFF file containing the features of interest.", category: "required"}
outputTable: {description: "The path to which the output table should be written.", category: "common"}
- nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"}
order: {description: "Equivalent to the -r option of htseq-count.", category: "advanced"}
stranded: {description: "Equivalent to the -s option of htseq-count.", category: "common"}
+ additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"}
featureType: {description: "Equivalent to the --type option of htseq-count.", category: "advanced"}
idattr: {description: "Equivalent to the --idattr option of htseq-count.", category: "advanced"}
- additionalAttributes: {description: "Equivalent to the --additional-attr option of htseq-count.", category: "advanced"}
+ nprocesses: {description: "Number of processes to run htseq with.", category: "advanced"}
memory: {description: "The amount of memory the job requires in GB.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ counts: {description: "Count table based on input BAM file."}
}
}
diff --git a/isoseq3.wdl b/isoseq3.wdl
index 604a71d5..77f19f80 100644
--- a/isoseq3.wdl
+++ b/isoseq3.wdl
@@ -1,6 +1,6 @@
version 1.0
-# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center
+# Copyright (c) 2020 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,10 +8,10 @@ version 1.0
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -26,14 +26,15 @@ task Refine {
Boolean requirePolyA = false
String logLevel = "WARN"
File inputBamFile
+ File inputBamIndex
File primerFile
String outputDir
String outputNamePrefix
- Int cores = 2
- String memory = "2G"
+ Int threads = 2
+ String memory = "2GiB"
Int timeMinutes = 30
- String dockerImage = "quay.io/biocontainers/isoseq3:3.3.0--0"
+ String dockerImage = "quay.io/biocontainers/isoseq3:3.4.0--0"
}
command {
@@ -43,7 +44,7 @@ task Refine {
--min-polya-length ~{minPolyALength} \
~{true="--require-polya" false="" requirePolyA} \
--log-level ~{logLevel} \
- --num-threads ~{cores} \
+ --num-threads ~{threads} \
--log-file "~{outputDir}/~{outputNamePrefix}.stderr.log" \
~{inputBamFile} \
~{primerFile} \
@@ -60,7 +61,7 @@ task Refine {
}
runtime {
- cpu: cores
+ cpu: threads
memory: memory
time_minutes: timeMinutes
docker: dockerImage
@@ -72,10 +73,11 @@ task Refine {
requirePolyA: {description: "Require fl reads to have a poly(A) tail and remove it.", category: "common"}
logLevel: {description: "Set log level. Valid choices: (TRACE, DEBUG, INFO, WARN, FATAL).", category: "advanced"}
inputBamFile: {description: "Bam input file.", category: "required"}
+ inputBamIndex: {description: "Index for the Bam input file.", category: "required"}
primerFile: {description: "Barcode/primer fasta file.", category: "required"}
outputDir: {description: "Output directory path.", category: "required"}
outputNamePrefix: {description: "Basename of the output files.", category: "required"}
- cores: {description: "The number of cores to be used.", category: "advanced"}
+ threads: {description: "The number of threads to be used.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
diff --git a/lima.wdl b/lima.wdl
index 2e8a7085..eece2b3f 100644
--- a/lima.wdl
+++ b/lima.wdl
@@ -1,6 +1,6 @@
version 1.0
-# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center
+# Copyright (c) 2020 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,10 +8,10 @@ version 1.0
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -48,15 +48,15 @@ task Lima {
File barcodeFile
String outputPrefix
- Int cores = 2
- String memory = "2G"
+ Int threads = 2
+ String memory = "2GiB"
Int timeMinutes = 30
- String dockerImage = "quay.io/biocontainers/lima:1.11.0--0"
+ String dockerImage = "quay.io/biocontainers/lima:2.2.0--h9ee0642_0"
}
Map[String, String] libraryDesignOptions = {"same": "--same", "different": "--different", "neighbors": "--neighbors"}
- command {
+ command <<<
set -e
mkdir -p "$(dirname ~{outputPrefix})"
lima \
@@ -82,34 +82,31 @@ task Lima {
--guess-min-count ~{guessMinCount} \
~{true="--peek-guess" false="" peekGuess} \
--log-level ~{logLevel} \
- --num-threads ~{cores} \
- ~{"--log-file " + outputPrefix + ".fl.stderr.log"} \
+ --num-threads ~{threads} \
+ ~{"--log-file " + outputPrefix + ".lima.stderr.log"} \
~{inputBamFile} \
~{barcodeFile} \
- ~{basename(outputPrefix) + ".fl.bam"}
+ ~{outputPrefix + ".bam"}
- # copy commands below are needed because glob command does not find
- # multiple bam/bam.pbi/subreadset.xml files when not located in working
- # directory.
- cp "~{basename(outputPrefix)}.fl.json" "~{outputPrefix}.fl.json"
- cp "~{basename(outputPrefix)}.fl.lima.counts" "~{outputPrefix}.fl.lima.counts"
- cp "~{basename(outputPrefix)}.fl.lima.report" "~{outputPrefix}.fl.lima.report"
- cp "~{basename(outputPrefix)}.fl.lima.summary" "~{outputPrefix}.fl.lima.summary"
- }
+ dirName="$(dirname ~{outputPrefix})"
+ find "$(cd ${dirName}; pwd)" -name "*.bam" > bamFiles.txt
+ find "$(cd ${dirName}; pwd)" -name "*.bam.pbi" > bamIndexes.txt
+ find "$(cd ${dirName}; pwd)" -name "*.consensusreadset.xml" > consensusreadset.txt
+ >>>
output {
- Array[File] limaBam = glob("*.bam")
- Array[File] limaBamIndex = glob("*.bam.pbi")
- Array[File] limaXml = glob("*.subreadset.xml")
- File limaStderr = outputPrefix + ".fl.stderr.log"
- File limaJson = outputPrefix + ".fl.json"
- File limaCounts = outputPrefix + ".fl.lima.counts"
- File limaReport = outputPrefix + ".fl.lima.report"
- File limaSummary = outputPrefix + ".fl.lima.summary"
+ Array[File] limaBam = read_lines("bamFiles.txt")
+ Array[File] limaBamIndex = read_lines("bamIndexes.txt")
+ Array[File] limaXml = read_lines("consensusreadset.txt")
+ File limaStderr = outputPrefix + ".lima.stderr.log"
+ File limaJson = outputPrefix + ".json"
+ File limaCounts = outputPrefix + ".lima.counts"
+ File limaReport = outputPrefix + ".lima.report"
+ File limaSummary = outputPrefix + ".lima.summary"
}
runtime {
- cpu: cores
+ cpu: threads
memory: memory
time_minutes: timeMinutes
docker: dockerImage
@@ -142,7 +139,7 @@ task Lima {
inputBamFile: {description: "Bam input file.", category: "required"}
barcodeFile: {description: "Barcode/primer fasta file.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
- cores: {description: "The number of cores to be used.", category: "advanced"}
+ threads: {description: "The number of threads to be used.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
diff --git a/macs2.wdl b/macs2.wdl
index fad3cb00..5ccc5a5f 100644
--- a/macs2.wdl
+++ b/macs2.wdl
@@ -24,14 +24,21 @@ task PeakCalling {
input {
Array[File]+ inputBams
Array[File]+ inputBamsIndex
- Array[File]+? controlBams
- Array[File]+? controlBamsIndex
- String outDir
+ Array[File] controlBams
+ Array[File] controlBamsIndex
+ String outDir = "macs2"
String sampleName
+ String format = "AUTO"
Boolean nomodel = false
-
- Int threads = 1
- String memory = "8G"
+ String? gensz
+ Int? extsize
+ Int? shiftsize
+ Float? pval_thres
+ Boolean bdg = false
+ String? keepdup
+ Boolean callsummits = false
+ Int timeMinutes = 600 # Default to 10 hours
+ String memory = "8GiB"
String dockerImage = "quay.io/biocontainers/macs2:2.1.2--py27r351_0"
}
@@ -39,10 +46,18 @@ task PeakCalling {
set -e
macs2 callpeak \
--treatment ~{sep = ' ' inputBams} \
- ~{true="--control" false="" defined(controlBams)} ~{sep = ' ' controlBams} \
+ ~{true="--control" false="" length(controlBams) > 0} ~{sep = ' ' controlBams} \
--outdir ~{outDir} \
--name ~{sampleName} \
- ~{true='--nomodel' false='' nomodel}
+ ~{"-f " + format} \
+ ~{"-g " + gensz} \
+ ~{"-p " + pval_thres} \
+ ~{"--shift " + shiftsize} \
+ ~{"--extsize " + extsize} \
+ ~{true='--nomodel' false='' nomodel} \
+ ~{true='-B' false='' bdg} \
+ ~{"--keep-dup " + keepdup} \
+ ~{true='--call-summits' false='' callsummits}
}
output {
@@ -50,8 +65,29 @@ task PeakCalling {
}
runtime {
- cpu: threads
+ cpu: 1
memory: memory
docker: dockerImage
+ time_minutes: timeMinutes
+ }
+ parameter_meta {
+ inputBams: {description: "The BAM files on which to perform peak calling.", category: "required"}
+ inputBamsIndex: {description: "The indexes for the input BAM files.", category: "required"}
+ controlBams: {description: "Control BAM files for the input bam files.", category: "common"}
+ controlBamsIndex: {description: "The indexes for the control BAM files.", category: "common"}
+ sampleName: {description: "Name of the sample to be analysed", category: "required"}
+ outDir: {description: "All output files will be written in this directory.", category: "advanced"}
+ nomodel: {description: "Whether or not to build the shifting model.", category: "advanced"}
+ gensz: {description: "macs2 argument for setting the mappable genome size or effective genome size which is defined as the genome size which can be sequenced.", category: "advanced"}
+ pval_thres: {description: "macs2 argument for setting the p-value cutoff. If -p is specified, MACS2 will use p-value instead of q-value.", category: "advanced"}
+ shiftsize: {description: "macs2 argument to set an arbitrary shift in bp. Can be negative to indicate direction.", category: "advanced"}
+ extsize: {description: "macs2 argument to extend reads in 5'->3' direction to fix-sized fragments.", category: "advanced"}
+ bdg: {description: "macs2 argument that enables the storage of the fragment pileup, control lambda in bedGraph files.", category: "advanced"}
+ keepdup: {description: "macs2 argument that controls the behavior towards duplicate tags at the exact same location.", category: "advanced"}
+ callsummits: {description: "macs2 argument to reanalyze the shape of signal profile to deconvolve subpeaks within each peak called from the general procedure.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ format: {description: "Which format to use. Use BAMPE for paired-end reads.", category: "common"}
}
-}
\ No newline at end of file
+}
diff --git a/manta.wdl b/manta.wdl
index 5382d2a5..fde8c208 100644
--- a/manta.wdl
+++ b/manta.wdl
@@ -27,13 +27,14 @@ task Germline {
File referenceFasta
File referenceFastaFai
String runDir = "./manta_run"
+ Boolean exome = false
+
File? callRegions
File? callRegionsIndex
- Boolean exome = false
Int cores = 1
Int memoryGb = 4
- Int timeMinutes = 60
+ Int timeMinutes = 2880
String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1"
}
@@ -59,7 +60,7 @@ task Germline {
runtime {
cpu: cores
- memory: "~{memoryGb}G"
+ memory: "~{memoryGb}GiB"
docker: dockerImage
time_minutes: timeMinutes
}
@@ -71,13 +72,17 @@ task Germline {
referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"}
referenceFastaFai: {description: "Fasta index (.fai) file of the reference", category: "required" }
runDir: {description: "The directory to use as run/output directory.", category: "common"}
+ exome: {description: "Whether or not the data is from exome sequencing.", category: "common"}
callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"}
callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"}
- exome: {description: "Whether or not the data is from exome sequencing.", category: "common"}
cores: {description: "The the number of cores required to run a program", category: "required"}
memoryGb: {description: "The memory required to run the manta", category: "required"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ mantaVCF: {description: "SVs and indels scored and genotyped under a diploid model for the set of samples in a joint diploid sample analysis or for the normal sample in a tumor/normal subtraction analysis."}
+ mantaVCFindex: {description: "Index of output mantaVCF."}
}
}
@@ -85,18 +90,19 @@ task Somatic {
input {
File tumorBam
File tumorBamIndex
- File? normalBam
- File? normalBamIndex
File referenceFasta
File referenceFastaFai
String runDir = "./manta_run"
+ Boolean exome = false
+
+ File? normalBam
+ File? normalBamIndex
File? callRegions
File? callRegionsIndex
- Boolean exome = false
Int cores = 1
Int memoryGb = 4
- Int timeMinutes = 60
+ Int timeMinutes = 2880
String dockerImage = "quay.io/biocontainers/manta:1.4.0--py27_1"
}
@@ -132,25 +138,36 @@ task Somatic {
runtime {
cpu: cores
- memory: "~{memoryGb}G"
+ memory: "~{memoryGb}GiB"
docker: dockerImage
time_minutes: timeMinutes
}
parameter_meta {
+ # inputs
tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"}
tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"}
- normalBam: {description: "The normal/control sample's BAM file.", category: "common"}
- normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"}
referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
runDir: {description: "The directory to use as run/output directory.", category: "common"}
+ exome: {description: "Whether or not the data is from exome sequencing.", category: "common"}
+ normalBam: {description: "The normal/control sample's BAM file.", category: "common"}
+ normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "common"}
callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"}
callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"}
- exome: {description: "Whether or not the data is from exome sequencing.", category: "common"}
cores: {description: "The number of cores to use.", category: "advanced"}
memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ candidateSmallIndelsVcf: {description: "Subset of the candidateSV.vcf.gz file containing only simple insertion and deletion variants less than the minimum scored variant size."}
+ candidateSmallIndelsVcfIndex: {description: "Index of output VCF file candidateSmallIndelsVcf."}
+ candidateSVVcf: {description: "Unscored SV and indel candidates."}
+ candidatSVVcfIndex: {description: "Index of output VCF file candidateSVVcf."}
+ tumorSVVcf: {description: "Subset of the candidateSV.vcf.gz file after removing redundant candidates and small indels less than the minimum scored variant size."}
+ tumorSVVcfIndex: {description: "Index of output VCF file tumorSVVcf."}
+ diploidSV: {description: "SVs and indels scored and genotyped under a diploid model for the set of samples in a joint diploid sample analysis or for the normal sample in a tumor/normal subtraction analysis."}
+ diploidSVindex: {description: "Index of output VCF file diploidSV."}
}
}
diff --git a/minimap2.wdl b/minimap2.wdl
index fb31fb7f..a7584beb 100644
--- a/minimap2.wdl
+++ b/minimap2.wdl
@@ -1,6 +1,6 @@
version 1.0
-# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center
+# Copyright (c) 2019 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,10 +8,10 @@ version 1.0
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -31,9 +31,9 @@ task Indexing {
Int? splitIndex
Int cores = 1
- String memory = "4G"
+ String memory = "4GiB"
Int timeMinutes = 10
- String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0"
+ String dockerImage = "quay.io/biocontainers/minimap2:2.20--h5bf99c6_0"
}
command {
@@ -61,7 +61,7 @@ task Indexing {
}
parameter_meta {
- # input
+ # inputs
useHomopolymerCompressedKmer: {description: "Use homopolymer-compressed k-mer (preferrable for pacbio).", category: "advanced"}
kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"}
minimizerWindowSize: {description: "Minimizer window size.", category: "advanced"}
@@ -73,7 +73,7 @@ task Indexing {
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
- # output
+ # outputs
indexFile: {description: "Indexed reference file."}
}
}
@@ -81,52 +81,82 @@ task Indexing {
task Mapping {
input {
String presetOption
- Int kmerSize = 15
- Boolean skipSelfAndDualMappings = false
- Boolean outputSam = false
String outputPrefix
- Boolean addMDTagToSam = false
- Boolean secondaryAlignment = false
File referenceFile
File queryFile
+
+ Int compressionLevel = 1
+ Boolean nameSorted = false
+ # MM, ML, MN -> Methylation flags
+ # Also keep the following flags for Sequali to be able to run on the mapped bam file and get ONT information.
+ # ch -> channel
+ # st -> start time
+ # du -> duration
+ # dx -> Whether read was duplex
+ # pi -> Parent ID for split read
+
+ String tagsToKeep = "MM,ML,MN,ch,st,du,dx,pi"
+ Boolean skipSelfAndDualMappings = false
+ Boolean addMDTagToSam = false
+ Boolean secondaryAlignment = true
+
+ Int? kmerSize
Int? maxIntronLength
Int? maxFragmentLength
Int? retainMaxSecondaryAlignments
Int? matchingScore
Int? mismatchPenalty
String? howToFindGTAG
+ String? readgroup
- Int cores = 4
- String memory = "30G"
+ Int sortThreads = 2
+ Int sortMemoryGb = 1
+ Int cores = 8
+ String memory = "24GiB"
Int timeMinutes = 1 + ceil(size(queryFile, "G") * 200 / cores)
- String dockerImage = "quay.io/biocontainers/minimap2:2.17--h84994c4_0"
+ # Minimap 2.28 samtools 1.20
+ String dockerImage = "quay.io/biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0"
}
- command {
- set -e
+ # Always run data through samtools fastq. This supports both FASTQ and uBAM
+ # files. It does remove any existing FASTQ comments, but this should not be
+ # problematic for most files.
+
+ command <<<
+ set -e -o pipefail
mkdir -p "$(dirname ~{outputPrefix})"
+ samtools fastq -T "~{tagsToKeep}" ~{queryFile} | \
minimap2 \
+ -a \
-x ~{presetOption} \
- -k ~{kmerSize} \
~{true="-X" false="" skipSelfAndDualMappings} \
- ~{true="-a" false="" outputSam} \
- -o ~{outputPrefix} \
~{true="--MD" false="" addMDTagToSam} \
--secondary=~{true="yes" false="no" secondaryAlignment} \
+ -y \
-t ~{cores} \
+ ~{"-k " + kmerSize} \
~{"-G " + maxIntronLength} \
~{"-F " + maxFragmentLength} \
~{"-N " + retainMaxSecondaryAlignments} \
~{"-A " + matchingScore} \
~{"-B " + mismatchPenalty} \
~{"-u " + howToFindGTAG} \
+ ~{"-R '" + readgroup}~{false="" true="'" defined(readgroup)} \
~{referenceFile} \
- ~{queryFile}
- }
+ - \
+ | samtools sort \
+ ~{true="-N" false="" nameSorted} \
+ --threads ~{sortThreads - 1} \
+ -l ~{compressionLevel} \
+ -m ~{sortMemoryGb}G \
+ -o ~{outputPrefix}.bam
+ samtools index ~{outputPrefix}.bam
+ >>>
output {
- File alignmentFile = outputPrefix
+ File bam = "~{outputPrefix}.bam"
+ File bamIndex = "~{outputPrefix}.bam.bai"
}
runtime {
@@ -137,27 +167,34 @@ task Mapping {
}
parameter_meta {
+ # inputs
presetOption: {description: "This option applies multiple options at the same time.", category: "common"}
kmerSize: {description: "K-mer size (no larger than 28).", category: "advanced"}
- outputSam: {description: "Output in the sam format.", category: "common"}
+ skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
+ addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"}
+ secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"}
+ referenceFile: {description: "Reference fasta file.", category: "required"}
+ queryFile: {description: "Input fasta file.", category: "required"}
maxIntronLength: {description: "Max intron length (effective with -xsplice; changing -r).", category: "advanced"}
maxFragmentLength: {description: "Max fragment length (effective with -xsr or in the fragment mode).", category: "advanced"}
- skipSelfAndDualMappings: {description: "Skip self and dual mappings (for the all-vs-all mode).", category: "advanced"}
retainMaxSecondaryAlignments: {description: "Retain at most N secondary alignments.", category: "advanced"}
matchingScore: {description: "Matching score.", category: "advanced"}
mismatchPenalty: {description: "Mismatch penalty.", category: "advanced"}
+ tagsToKeep: {description: "Tags to keep from the input unaligned BAM file.", category: "Advanced"}
howToFindGTAG: {description: "How to find GT-AG. f:transcript strand, b:both strands, n:don't match GT-AG.", category: "common"}
- addMDTagToSam: {description: "Adds a MD tag to the sam output file.", category: "common"}
- secondaryAlignment: {description: "Whether to output secondary alignments.", category: "advanced"}
- referenceFile: {description: "Reference fasta file.", category: "required"}
- queryFile: {description: "Input fasta file.", category: "required"}
+ compressionLevel: {description: "compressionLevel for the output file", category: "advanced"}
+ sortThreads: {description: "Extra sorting threads used for samtools sort", category: "advanced"}
+ sortMemoryGb: {description: "Amount of memory set for sorting", category: "advanced"}
+ nameSorted: {description: "Output a name sorted file instead", category: "common"}
+
cores: {description: "The number of cores to be used.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
- # output
- alignmentFile: {description: "Mapping and alignment between collections of dna sequences file."}
+ # outputs
+ bam: {description: "Mapping and alignment between collections of dna sequences file in BAM format."}
+ bamIndex: {description: "Accompanying index file for the BAM file."}
}
}
diff --git a/modkit.wdl b/modkit.wdl
new file mode 100644
index 00000000..ddf4dbf7
--- /dev/null
+++ b/modkit.wdl
@@ -0,0 +1,250 @@
+version 1.0
+
+# Copyright (c) 2025 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Pileup {
+ input {
+ File bam
+ File bamIndex
+ String outputBed = "output.bedMethyl"
+ String outputBedGraph = "combined.bedgraph"
+ File referenceFasta
+ File referenceFastaFai
+
+ Int? intervalSize
+ File? includeBed
+ String? filterThreshold
+ String? filterPercentile
+
+ Boolean cpg = false
+ Boolean combineMods = false
+ Boolean combineStrands = false
+ String? ignore
+ String logFilePath = "modkit.log"
+
+ Int threads = 8
+ String memory = "4GiB"
+ Int timeMinutes = 2880 / threads # 2 Days / threads
+ String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0"
+ }
+
+ command <<<
+ set -e
+ mkdir -p $(dirname ~{outputBed})
+ mkdir -p $(dirname ~{logFilePath})
+ modkit pileup \
+ --threads ~{threads} \
+ ~{"--interval-size " + intervalSize} \
+ ~{"--include-bed " + includeBed} \
+ ~{"--ignore " + ignore} \
+ --ref ~{referenceFasta} \
+ ~{true="--cpg" false="" cpg} \
+ ~{true="--combine-mods" false="" combineMods} \
+ ~{true="--combine-strands" false="" combineStrands} \
+ ~{"--filter-percentile " + filterPercentile} \
+ ~{"--filter-threshold " + filterThreshold} \
+ --log-filepath ~{logFilePath} \
+ ~{bam} \
+ - | tee ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10 >> "~{outputBedGraph}_"$4"_"$6".bedGraph"}'
+ # Separately generate the combined file as well, so users can have a choice.
+ cat ~{outputBed} | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > ~{outputBedGraph}
+ >>>
+
+ # You can use modkit pileup ${bam_path} - | tee out.bedmethyl | awk -v OFS="\t" '{print $1, $2, $3, $11, $10}' > out.bg to get both outputs at once without running anything twice.
+ # https://github.com/nanoporetech/modkit/issues/210#issuecomment-2181706374
+
+ output {
+ File out = outputBed # Normal mode
+ File outGraph = outputBedGraph # Normal mode
+ Array[File] outFiles = glob(outputBedGraph + "*.bedGraph") # Bedgraph mode
+ File logFile = logFilePath
+ }
+
+ runtime {
+ docker: dockerImage
+ cpu: threads
+ memory: memory
+ time_minutes: timeMinutes
+ }
+
+ parameter_meta {
+ # input
+ bam: {description: "The input alignment file", category: "required"}
+ bamIndex: {description: "The index for the input alignment file", category: "required"}
+ referenceFasta: {description: "The reference fasta file.", category: "required"}
+ referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ outputBed: {description: "The output name where the bedMethyl file should be placed.", category: "common"}
+ outputBedGraph: {description: "The output name where the bedgraph file should be placed", category: "common"}
+
+ intervalSize: {description: "Sets the interval size", category: "advanced"}
+ includeBed: {description: "Bed file with regions to include", category: "advanced"}
+ cpg: {description: "Whether to call only at cpg sites", category: "advanced"}
+ combineMods: {description: "Whether to combine modifications in the output", category: "advanced"}
+ combineStrands: {description: "Whether to combine strands in the output", category: "advanced"}
+ ignore: {description: "Modification type to ignore. For example 'h'.", category: "advanced"}
+ logFilePath: {description: "Path where the log file should be written.", category: "advanced"}
+ filterThreshold: {description: "Global filter threshold can be specified with by a decimal number (e.g. 0.75). Otherwise the automatic filter percentile will be used.", category: "advanced"}
+ filterPercentile: {description: "This defaults to 0.1, to remove the lowest 10% confidence modification calls, but can be manually adjusted", category: "advanced"}
+
+ threads: {description: "The number of threads to use for variant calling.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # output
+ out: {description: "The output bed files. Not available when bedgraph = true."}
+ outFiles: {description: "Output files when bedgraph = true."}
+ logFile: {description: "The generated log file."}
+ }
+}
+
+task Summary {
+ input {
+ File bam
+ File bamIndex
+
+ String summary = "modkit.summary.txt"
+
+ Boolean sample = true
+ Int? numReads # = 10042
+ Float? samplingFrac # = 0.1
+ Int? seed
+
+ Int threads = 4
+ String memory = ceil(size(bam, "GiB") * 0.1) + 5 # Based on a linear model with some fudge (memory = 0.07540 * file_size - 0.6).
+ Int timeMinutes = 60 # originally this was set at "2 Days / threads" but with 4 threads and that much ram, it's pretty fast.
+ String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0"
+ }
+
+ command <<<
+ set -e
+ mkdir -p $(dirname ~{summary})
+
+ modkit summary \
+ --threads ~{threads} \
+ ~{true="" false="--no-sampling" sample} \
+ ~{"--num-reads " + numReads} \
+ ~{"--sampling-frac " + samplingFrac} \
+ ~{"--seed " + seed} \
+ ~{bam} > ~{summary}
+ >>>
+
+ output {
+ File summaryReport = summary # Normal mode
+ }
+
+ runtime {
+ docker: dockerImage
+ cpu: threads
+ memory: memory
+ time_minutes: timeMinutes
+ }
+
+ parameter_meta {
+ # input
+ bam: {description: "The input alignment file", category: "required"}
+ bamIndex: {description: "The index for the input alignment file", category: "required"}
+
+ sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"}
+ numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"}
+ samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"}
+ seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"}
+
+ threads: {description: "The number of threads to use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # output
+ summaryReport: {description: "The output modkit summary."}
+ }
+}
+
+task SampleProbs {
+ input {
+ File bam
+ File bamIndex
+
+ String summary = "modkit-sample-probs"
+
+ Boolean sample = true
+ Int? numReads # = 10042
+ Float? samplingFrac # = 0.1
+ Int? seed
+
+ Int threads = 4
+ String memory = "32G"
+ Int timeMinutes = 60
+ String dockerImage = "quay.io/biocontainers/ont-modkit:0.4.3--hcdda2d0_0"
+ }
+
+ command <<<
+ set -e
+ mkdir -p ~{summary}
+
+ modkit sample-probs \
+ --threads ~{threads} \
+ --out-dir ~{summary} \
+ ~{true="" false="--no-sampling" sample} \
+ ~{"--num-reads " + numReads} \
+ ~{"--sampling-frac " + samplingFrac} \
+ ~{"--seed " + seed} \
+ --hist \
+ ~{bam}
+ >>>
+
+ output {
+ File reportCounts = "~{summary}/counts.html"
+ File reportProportion = "~{summary}/proportion.html"
+ File reportProbabilitiesTsv = "~{summary}/probabilities.tsv"
+ File reportThresholdsTsv = "~{summary}/thresholds.tsv"
+ }
+
+ runtime {
+ docker: dockerImage
+ cpu: threads
+ memory: memory
+ time_minutes: timeMinutes
+ }
+
+ parameter_meta {
+ # input
+ bam: {description: "The input alignment file", category: "required"}
+ bamIndex: {description: "The index for the input alignment file", category: "required"}
+ summary: {description: "A folder for the outputs", category: "required"}
+
+ sample: {description: "Allows you to disable sampling and report stats for the whole file.", category: "advanced"}
+ numReads: {description: "By default a fixed amount of reads are read, you can set this to change the number of reads to sample.", category: "advanced"}
+ samplingFrac: {description: "Use a fixed percentage of reads, rather than a fixed number of reads, for sampling.", category: "advanced"}
+ seed: {description: "A seed can be provided for reproducibility in the sampling fraction case.", category: "advanced"}
+
+ threads: {description: "The number of threads to use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # output
+ reportCounts: {description: "The output html report of counts"}
+ reportProportion: {description: "The output html report of proportions"}
+ reportProbabilitiesTsv: {description: "The output TSV of Probabilities"}
+ reportThresholdsTsv: {description: "The output TSV of thresholds"}
+ }
+}
diff --git a/mosdepth.wdl b/mosdepth.wdl
new file mode 100644
index 00000000..43e95614
--- /dev/null
+++ b/mosdepth.wdl
@@ -0,0 +1,106 @@
+version 1.0
+
+# Copyright (c) 2025 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Mosdepth {
+ input {
+ File bam
+ File bamIndex
+ String prefix = "./out"
+
+ String? chrom
+ # --by flag takes a BED file or an integer. So there need to be two inputs in WDL's typed system.
+ File? byBed
+ Int? byWindow
+ File? fasta
+ Int? flag
+ Int? includeFlag
+
+ Boolean noPerBase = false
+ Boolean d4 = false
+ Boolean fastMode = false
+
+ Int threads = 1
+ String memory = "4GiB"
+ Int timeMinutes = 10 + ceil(size(bam, "G")) * 4
+ String dockerImage = "quay.io/biocontainers/mosdepth:0.3.10--h4e814b3_1"
+ }
+
+ command <<<
+ set -e
+ mkdir -p $(dirname ~{prefix})
+ mosdepth \
+ --threads ~{threads} \
+ ~{"--chrom " + chrom} \
+ ~{"--by " + byBed} \
+ ~{"--by " + byWindow} \
+ ~{"--fasta " + fasta} \
+ ~{true="--no-per-base" false="" noPerBase} \
+ ~{true="--d4" false="" d4} \
+ ~{"--flag " + flag} \
+ ~{"--include-flag " + includeFlag} \
+ ~{true="--fast-mode" false="" fastMode} \
+ ~{prefix} ~{bam}
+ >>>
+
+ output {
+ File globalDist = "~{prefix}.mosdepth.global.dist.txt"
+ File summary = "~{prefix}.mosdepth.summary.txt"
+ File? perBaseBed = "~{prefix}.per-base.bed.gz"
+ File? regionsBed = "~{prefix}.regions.bed.gz"
+ }
+
+ runtime {
+ cpu: threads
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ bam: {description: "Input BAM or CRAM file.", category: "required"}
+ bamIndex: {description: "Index for the input BAM or CRAM file.", category: "required"}
+ prefix: {description: "Output prefix.", category: "common"}
+
+ chrom: {description: "Chromosome to restrict depth calculation.", category: "advanced"}
+ byBed: {description: "Bed file with windows to include for the --by flag. Should not be used together with byWindow.", category: "common"}
+ byWindow: {description: "Integer window size for the --by flag. Should not be used together with byBed.", category: "advanced"}
+ fasta: {description: "FASTA file, only necessary when CRAM input is used.", category: "advanced"}
+ flag: {description: "Exclude reads with any of the bits in FLAG set.", category: "advanced"}
+ includeFlag: {description: "Only include reads with any of the bits in FLAG set.", category: "advanced"}
+
+ noPerBase: {description: "Don't output per-base depth. Skipping this output will speed execution.", category: "common"}
+ d4: {description: "output per-base depth in d4 format.", category: "advanced"}
+ fastMode: {description: "Don't look at internal cigar operations or correct mate overlaps (recommended for most use-cases).", category: "common"}
+
+ threads: {description: "How many threads to use.", category: "common"}
+ memory: {description: "How much memory to allocate.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ globalDist: {description: "Global distribution table file."}
+ summary: {description: "Summary table file."}
+ perBaseBed: {description: "Per base coverage BED file."}
+ regionsBed: {description: "Per region BED file, if byBed or byWindow is used."}
+ }
+}
\ No newline at end of file
diff --git a/multiqc.wdl b/multiqc.wdl
index 7dcf333e..db47ac87 100644
--- a/multiqc.wdl
+++ b/multiqc.wdl
@@ -22,16 +22,29 @@ version 1.0
task MultiQC {
input {
- # Use a string here so cromwell does not relocate an entire analysis directory
+ # Use a string here so cromwell does not relocate an entire
+ # analysis directory.
Array[File] reports
Boolean force = false
Boolean dirs = false
- Int? dirsDepth
Boolean fullNames = false
+ String outDir = "."
+ Boolean dataDir = false
+ Boolean zipDataDir = true
+ Boolean export = false
+ Boolean flat = false
+ Boolean interactive = true
+ Boolean lint = false
+ Boolean pdf = false
+ # This must be actively enabled in my opinion.
+ # The tools default is to upload.
+ Boolean megaQCUpload = false
+ Boolean enableAi = false
+
+ Int? dirsDepth
String? title
String? comment
String? fileName
- String outDir = "."
String? template
String? tag
String? ignore
@@ -40,22 +53,17 @@ task MultiQC {
File? fileList
Array[String]+? exclude
Array[String]+? module
- Boolean dataDir = false
+ Array[File]+? additionalReports
String? dataFormat
- Boolean zipDataDir = true
- Boolean export = false
- Boolean flat = false
- Boolean interactive = true
- Boolean lint = false
- Boolean pdf = false
- Boolean megaQCUpload = false # This must be actively enabled in my opinion. The tools default is to upload.
File? config # A directory
String? clConfig
+
String? memory
- Int timeMinutes = 2 + ceil(size(reports, "G") * 8)
- String dockerImage = "quay.io/biocontainers/multiqc:1.7--py_1"
+ Int timeMinutes = 10 + ceil(size(reports, "GiB") * 8)
+ String dockerImage = "quay.io/biocontainers/multiqc:1.28--pyhdfd78af_0"
}
- Int memoryGb = 2 + ceil(size(reports, "G"))
+
+ Int memoryGb = 2 + ceil(size(reports, "GiB"))
# This is where the reports end up. It does not need to be changed by the
# user. It is full of symbolic links, so it is not of any use to the user
@@ -69,15 +77,18 @@ task MultiQC {
# By hashing the parent path we make sure there are no file colissions as
# files from the same directory end up in the same directory, while files
# from other directories get their own directory. Cromwell also uses this
- # strategy. Using python's builtin hash is unique enough for these purposes.
-
+ # strategy. Using python's builtin hash is unique enough
+ # for these purposes.
+
+ Array[File] allReports = flatten([reports, flatten(select_all([additionalReports]))])
+
command {
python3 < ~{ntFilePath}
@@ -132,15 +132,16 @@ task DownloadAccessionToTaxId {
command {
set -e -o pipefail
mkdir -p ~{downloadDir}
- rsync -av \
- --partial \
- rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* \
- ~{downloadDir}
+ rsync \
+ -av \
+ --partial \
+ rsync://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/nucl_*.accession2taxid.gz* \
+ ~{downloadDir}
(cd ~{downloadDir} && md5sum -c *.md5)
for file in ~{downloadDir}/nucl_*.accession2taxid.gz
do
zcat $file | tail -n +2 | cut -f 2,3 ~{true="| gzip" false='' gzip} > \
- $file.seqtaxmap~{true='.gz' false='' gzip}
+ $file.seqtaxmap~{true='.gz' false='' gzip}
done
}
diff --git a/pacbio.wdl b/pacbio.wdl
new file mode 100644
index 00000000..dcf0f69e
--- /dev/null
+++ b/pacbio.wdl
@@ -0,0 +1,95 @@
+version 1.0
+
+# Copyright (c) 2020 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task mergePacBio {
+ input {
+ Array[File]+ reports
+ String outputPathMergedReport
+
+ String memory = "4GiB"
+ String dockerImage = "quay.io/redmar_van_den_berg/pacbio-merge:0.2"
+ }
+
+ command {
+ set -e
+ mkdir -p $(dirname ~{outputPathMergedReport})
+ pacbio_merge \
+ --reports ~{sep=" " reports} \
+ --json-output ~{outputPathMergedReport}
+ }
+
+ runtime {
+ memory: memory
+ docker: dockerImage
+ }
+
+ output {
+ File outputMergedReport = outputPathMergedReport
+ }
+
+ parameter_meta {
+ # inputs
+ reports: {description: "The PacBio report files to merge.", category: "required"}
+ outputPathMergedReport: {description: "The location the merged PacBio report file should be written to.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputMergedReport: {description: "The PacBio reports merged into one."}
+ }
+}
+
+task ccsChunks {
+ input {
+ Int chunkCount
+
+ String memory = "4GiB"
+ String dockerImage = "python:3.7-slim"
+ }
+
+ command {
+ set -e
+ python <7 days and failing.
+ String? read_name_regex
+
+ # In GATK Best practices pipeline MarkDuplicates is given a 7G VM.
# https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L1040
Int javaXmxMb = 6656 # 6.5G
String memoryMb = javaXmxMb + 512
- Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8)
- String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
-
- # The program default for READ_NAME_REGEX is appropriate in nearly every case.
- # Sometimes we wish to supply "null" in order to turn off optical duplicate detection
- # This can be desirable if you don't mind the estimated library size being wrong and
- # optical duplicate detection is taking >7 days and failing
- String? read_name_regex
+ Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 8)
+ String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0"
}
# Task is assuming query-sorted input so that the Secondary and Supplementary reads get
# marked correctly. This works because the output of BWA is query-grouped and therefore,
# so is the output of MergeBamAlignment. While query-grouped isn't actually query-sorted,
- # it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname"
-
+ # it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname".
command {
set -e
mkdir -p "$(dirname ~{outputBamPath})"
@@ -501,6 +769,8 @@ task MarkDuplicates {
OUTPUT=~{outputBamPath} \
METRICS_FILE=~{metricsPath} \
COMPRESSION_LEVEL=~{compressionLevel} \
+ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
+ USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
VALIDATION_STRINGENCY=SILENT \
~{"READ_NAME_REGEX=" + read_name_regex} \
OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
@@ -508,8 +778,6 @@ task MarkDuplicates {
CREATE_INDEX=true \
ADD_PG_TAG_TO_READS=false \
CREATE_MD5_FILE=~{true="true" false="false" createMd5File} \
- USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
- USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater}
}
output {
@@ -520,9 +788,9 @@ task MarkDuplicates {
}
runtime {
- docker: dockerImage
+ memory: "~{memoryMb}MiB"
time_minutes: timeMinutes
- memory: "~{memoryMb}M"
+ docker: dockerImage
}
parameter_meta {
@@ -530,42 +798,49 @@ task MarkDuplicates {
inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"}
outputBamPath: {description: "The location where the ouptut BAM file should be written.", category: "required"}
metricsPath: {description: "The location where the output metrics file should be written.", category: "required"}
- read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"}
- createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"}
+ compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
- compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"}
+ createMd5File: {description: "Whether to create a md5 file for the created BAM file.", category: "advanced"}
+ read_name_regex: {description: "Equivalent to the `READ_NAME_REGEX` option of MarkDuplicates.", category: "advanced"}
+ javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.", category: "advanced"}
memoryMb: {description: "The amount of memory this job will use in megabytes.", category: "advanced"}
- javaXmxMb: {description: "The maximum memory available to the program in megabytes. Should be lower than `memoryMb` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputBam: {description: ""}
+ outputBamIndex: {description: ""}
+ outputBamMd5: {description: ""}
+ metricsFile: {description: ""}
}
}
-# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs
+# Combine multiple VCFs or GVCFs from scattered HaplotypeCaller runs.
task MergeVCFs {
input {
Array[File]+ inputVCFs
Array[File]+ inputVCFsIndexes
String outputVcfPath
-
- String memory = "5G"
- String javaXmx = "4G"
- Int timeMinutes = 1 + ceil(size(inputVCFs, "G")) * 2
- String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
Int compressionLevel = 1
- Boolean useJdkInflater = true # Slightly faster than the intel one.
- # Better results for compression level 1 (much smaller). Higher compression levels similar to intel deflater.
+ Boolean useJdkInflater = false
+ # Better results for compression level 1 (much smaller).
+ # Higher compression levels similar to intel deflater.
# NOTE: this might change in the future when the intel deflater is updated!
- Boolean useJdkDeflater = true
+ # Second NOTE: No it did not change. Only the fastest algorithm with
+ # worse compression is wrapped in the intel GKL. Instead of using
+ # one of the slightly slower but better compressing alternatives from ISA-L.
+ # (Which are also faster than zlib.)
+ Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater
+ String javaXmx = "4G"
+ String memory = "5GiB"
+ Int timeMinutes = 1 + ceil(size(inputVCFs, "GiB")) * 2
+ String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0"
}
- # Using MergeVcfs instead of GatherVcfs so we can create indices
- # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket
-
+ # Using MergeVcfs instead of GatherVcfs so we can create indices.
+ # See https://github.com/broadinstitute/picard/issues/789 for relevant GatherVcfs ticket.
command {
set -e
mkdir -p "$(dirname ~{outputVcfPath})"
@@ -575,7 +850,7 @@ task MergeVCFs {
OUTPUT=~{outputVcfPath} \
COMPRESSION_LEVEL=~{compressionLevel} \
USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
- USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater}
+ USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater}
}
output {
@@ -584,9 +859,9 @@ task MergeVCFs {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
@@ -594,16 +869,17 @@ task MergeVCFs {
inputVCFs: {description: "The VCF files to be merged.", category: "required"}
inputVCFsIndexes: {description: "The indexes of the VCF files.", category: "required"}
outputVcfPath: {description: "The location the output VCF file should be written to.", category: "required"}
-
- memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
- timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
- compressionLevel: {description: "The compression level at which the BAM files are written", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVcf: {description: "Multiple variant files combined into a single variant file."}
+ outputVcfIndex: {description: "Index of `outputVcf`."}
}
}
@@ -613,10 +889,12 @@ task SamToFastq {
File inputBamIndex
Boolean paired = true
- String memory = "17G"
String javaXmx = "16G" # High memory default to avoid crashes.
- String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
- File? NONE
+ String memory = "17GiB"
+ Int timeMinutes = 30
+ String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0"
+
+ File? noneFile
}
String outputRead1 = basename(inputBam, "\.[bs]am") + "_R1.fastq.gz"
@@ -635,13 +913,36 @@ task SamToFastq {
output {
File read1 = outputRead1
- File? read2 = if paired then outputRead2 else NONE
- File? unpairedRead = if paired then outputUnpaired else NONE
+ File? read2 = if paired then outputRead2 else noneFile
+ File? unpairedRead = if paired then outputUnpaired else noneFile
}
runtime {
- docker: dockerImage
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ inputBam: {description: "Input BAM file to extract reads from.", category: "required"}
+ inputBamIndex: {description: "Input BAM index file.", category: "required"}
+ paired: {description: "Set to false when input data is single-end.", category: "common"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ read1: {description: "Fastq file containing reads from the first pair."}
+ read2: {description: "Fastq file containing reads from the second pair."}
+ unpairedRead: {description: "Fastq file containing unpaired reads."}
+ }
+
+ meta {
+ WDL_AID: {
+ exclude: ["noneFile"]
+ }
}
}
@@ -650,9 +951,9 @@ task ScatterIntervalList {
File interval_list
Int scatter_count
- String memory = "4G"
String javaXmx = "3G"
- String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
+ String memory = "4GiB"
+ String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0"
}
command {
@@ -674,8 +975,8 @@ task ScatterIntervalList {
}
runtime {
- docker: dockerImage
memory: memory
+ docker: dockerImage
}
}
@@ -687,13 +988,15 @@ task SortSam {
Boolean createMd5File = false
Int maxRecordsInRam = 500000
Int compressionLevel = 1
+ Boolean useJdkInflater = false
+ Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater
- # Default ram of 4 GB. Using 125001.0 to prevent an answer of
+ # Default ram of 4 GB. Using 125001.0 to prevent an answer of
# 4.000000001 which gets rounded to 5.
# GATK Best practices uses 75000 here: https://github.com/gatk-workflows/broad-prod-wgs-germline-snps-indels/blob/d2934ed656ade44801f9cfe1c0e78d4f80684b7b/PairedEndSingleSampleWf-fc-hg38.wdl#L778
Int XmxGb = ceil(maxRecordsInRam / 125001.0)
- Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3)
- String dockerImage = "quay.io/biocontainers/picard:2.23.2--0"
+ Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3)
+ String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0"
}
command {
@@ -706,6 +1009,8 @@ task SortSam {
SORT_ORDER=~{true="queryname" false="coordinate" sortByName} \
CREATE_INDEX=true \
COMPRESSION_LEVEL=~{compressionLevel} \
+ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
+ USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
VALIDATION_STRINGENCY=SILENT \
CREATE_MD5_FILE=~{true="true" false="false" createMd5File}
@@ -718,19 +1023,28 @@ task SortSam {
runtime {
cpu: 1
- memory: "~{1 + XmxGb}G"
+ memory: "~{1 + XmxGb}GiB"
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
- inputBam: {description: "The unsorted input BAM file", category: "required"}
+ # inputs
+ inputBam: {description: "The unsorted input BAM file.", category: "required"}
outputPath: {description: "The location the output BAM file should be written to.", category: "required"}
- XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.",
- category: "advanced"}
+ sortByName: {description: "Sort the output file by name, default is position.", category: "advanced"}
+ createMd5File: {description: "Whether to create an MD5 digest for any BAM or FASTQ files created.", category: "advanced"}
+ maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"}
+ compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
+ useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
+ useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
+ XmxGb: {description: "The maximum memory available to picard SortSam. Should be lower than `memory` to accommodate JVM overhead and BWA mem's memory usage.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputBam: {description: "Sorted BAM file."}
+ outputBamIndex: {description: "Index of sorted BAM file."}
}
}
@@ -738,12 +1052,13 @@ task SortVcf {
input {
Array[File]+ vcfFiles
String outputVcfPath
+
File? dict
- String memory = "9G"
String javaXmx = "8G"
- Int timeMinutes = 1 + ceil(size(vcfFiles, "G") * 5)
- String dockerImage = "quay.io/biocontainers/picard:2.20.5--0"
+ String memory = "9GiB"
+ Int timeMinutes = 1 + ceil(size(vcfFiles, "GiB") * 5)
+ String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0"
}
@@ -763,9 +1078,9 @@ task SortVcf {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
@@ -773,13 +1088,14 @@ task SortVcf {
vcfFiles: {description: "The VCF files to merge and sort.", category: "required"}
outputVcfPath: {description: "The location the sorted VCF files should be written to.", category: "required"}
dict: {description: "A sequence dictionary matching the VCF files.", category: "advanced"}
-
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVcf: {description: "Sorted VCF file(s)."}
+ outputVcfIndex: {description: "Index(es) of sort(ed) VCF file(s)."}
}
}
@@ -788,10 +1104,11 @@ task RenameSample {
File inputVcf
String outputPath = "./picard/renamed.vcf"
String newSampleName
- String memory = "9G"
+
String javaXmx = "8G"
- Int timeMinutes = 1 + ceil(size(inputVcf, "G") * 2)
- String dockerImage = "quay.io/biocontainers/picard:2.19.0--0"
+ String memory = "9GiB"
+ Int timeMinutes = 1 + ceil(size(inputVcf, "GiB") * 2)
+ String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0"
}
command {
@@ -809,9 +1126,9 @@ task RenameSample {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
@@ -819,9 +1136,87 @@ task RenameSample {
inputVcf: {description: "The VCF file to process.", category: "required"}
outputPath: {description: "The location the output VCF file should be written.", category: "common"}
newSampleName: {description: "A string to replace the old sample name.", category: "required"}
- memory: {description: "The memory required to run the programs", category: "advanced"}
- javaXmx: {description: "The max. memory allocated for JAVA", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
+ memory: {description: "The memory required to run the programs.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ renamedVcf: {description: "New VCF with renamed sample."}
}
}
+
+task UmiAwareMarkDuplicatesWithMateCigar {
+ input {
+ Array[File] inputBams
+ String outputPath
+ String outputPathMetrics = outputPath + ".metrics"
+ String outputPathUmiMetrics = outputPath + ".umi-metrics"
+ Int maxRecordsInRam = 1500000 # Default is 500_000 but that will lead to very small files on disk.
+ String? assumeSortOrder
+ String tempdir = "temp"
+ Boolean removeDuplicates = true
+ String umiTagName = "RX"
+ Int compressionLevel = 1
+ Boolean useJdkInflater = false
+ Boolean useJdkDeflater = true # Achieves much better compression rates than the intel deflater
+ String javaXmx = "8G"
+ String memory = "9GiB"
+ Int timeMinutes = 360
+ String dockerImage = "quay.io/biocontainers/picard:3.3.0--hdfd78af_0"
+ }
+
+ command {
+ set -e
+ mkdir -p "$(dirname ~{outputPath})" ~{tempdir}
+ picard -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ UmiAwareMarkDuplicatesWithMateCigar \
+ INPUT=~{sep=' INPUT=' inputBams} \
+ O=~{outputPath} \
+ M=~{outputPathMetrics} \
+ UMI_TAG_NAME=~{umiTagName} \
+ UMI_METRICS_FILE=~{outputPathUmiMetrics} \
+ TMP_DIR=~{tempdir} \
+ REMOVE_DUPLICATES=~{removeDuplicates} \
+ MAX_RECORDS_IN_RAM=~{maxRecordsInRam} \
+ CREATE_INDEX=true \
+ COMPRESSION_LEVEL=~{compressionLevel} \
+ USE_JDK_INFLATER=~{true="true" false="false" useJdkInflater} \
+ USE_JDK_DEFLATER=~{true="true" false="false" useJdkDeflater} \
+ ~{"ASSUME_SORT_ORDER=" + assumeSortOrder}
+ }
+
+ output {
+ File outputBam = outputPath
+ File outputBamIndex = sub(outputPath, "\.bam$", ".bai")
+ File outputMetrics = outputPathMetrics
+ File outputUmiMetrics = outputPathUmiMetrics
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ inputBams: {description: "The BAM files for which the duplicate reads should be marked.", category: "required"}
+ outputPath: {description: "The location the output BAM file should be written to.", category: "required"}
+ outputPathMetrics: {description: "The location the output metrics file should be written to.", category: "required"}
+ outputPathUmiMetrics: {description: "The location the output UMI metrics file should be written to.", category: "required"}
+ removeDuplicates: {description: "Whether the duplicate reads should be removed instead of marked.", category: "common"}
+ umiTagName: {description: "Which tag in the BAM file holds the UMI.", category: "common"}
+ assumeSortOrder: {description: "Assume a certain sort order even though the header might say otherwise.", category: "common"}
+ tempdir: {description: "Temporary directory.", category: "advanced"}
+ compressionLevel: {description: "The compression level at which the BAM files are written.", category: "advanced"}
+ maxRecordsInRam: {description: "This will specify the number of records stored in RAM before spilling to disk.", category: "advanced"}
+ useJdkInflater: {description: "True, uses the java inflater. False, uses the optimized intel inflater.", category: "advanced"}
+ useJdkDeflater: {description: "True, uses the java deflator to compress the BAM files. False uses the optimized intel deflater.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ }
+}
\ No newline at end of file
diff --git a/prepareShiny.wdl b/prepareShiny.wdl
new file mode 100644
index 00000000..28910743
--- /dev/null
+++ b/prepareShiny.wdl
@@ -0,0 +1,106 @@
+version 1.0
+
+# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task CreateDesignMatrix {
+ input {
+ File countTable
+ String shinyDir = "."
+
+ String memory = "5GiB"
+ Int timeMinutes = 30
+ String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0"
+ }
+
+ command {
+ set -e
+ mkdir -p ~{shinyDir}
+ predex design \
+ --input ~{countTable} \
+ --output ~{shinyDir}
+ }
+
+ output {
+ File dgeDesign = shinyDir + "/design_matrix.tsv"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ countTable: {description: "The created count table from HTseq.", category: "required"}
+ shinyDir: {description: "The directory to write the output to.", category: "required"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ dgeDesign: {description: "Design matrix template to add sample information for DGE analysis."}
+ }
+}
+
+task CreateAnnotation {
+ input {
+ File referenceFasta
+ File referenceGtfFile
+ String shinyDir = "."
+
+ String memory = "5GiB"
+ Int timeMinutes = 30
+ String dockerImage = "quay.io/biocontainers/predex:0.9.2--pyh3252c3a_0"
+ }
+
+ command {
+ set -e
+ mkdir -p ~{shinyDir}
+ predex annotation \
+ --fasta ~{referenceFasta} \
+ --gtf ~{referenceGtfFile} \
+ --output ~{shinyDir}
+ }
+
+ output {
+ File dgeAnnotation = shinyDir + "/annotation.tsv"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ referenceFasta: {description: "The reference Fasta file.", category: "required"}
+ referenceGtfFile: {description: "The reference GTF file.", category: "required"}
+ shinyDir: {description: "The directory to write the output to.", category: "required"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ dgeAnnotation: {description: "Annotation file for DGE analysis."}
+ }
+}
diff --git a/requirements-test.txt b/requirements-test.txt
deleted file mode 100644
index f074413b..00000000
--- a/requirements-test.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-cromwell
-miniwdl
\ No newline at end of file
diff --git a/rtg.wdl b/rtg.wdl
index 104a5ef9..62e1e77f 100644
--- a/rtg.wdl
+++ b/rtg.wdl
@@ -22,13 +22,13 @@ version 1.0
task Format {
input {
- String format = "fasta"
- String outputPath = "seq_data.sdf"
Array[File]+ inputFiles
- String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0"
+ String format = "fasta"
+ String outputPath = "reference_data"
String rtgMem = "8G"
- String memory = "9G"
- Int timeMinutes = 1 + ceil(size(inputFiles) * 2)
+ String memory = "9GiB"
+ Int timeMinutes = 1 + ceil(size(inputFiles, "GiB") * 2)
+ String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0"
}
command {
@@ -40,25 +40,27 @@ task Format {
}
output {
- File sdf = outputPath
+ Array[File] referenceFiles = glob("~{outputPath}/*")
}
runtime {
- docker: dockerImage
memory: memory
time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
- format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].",
- category: "advanced"}
+ # inputs
+ inputFiles: {description: "Input sequence files. May be specified 1 or more times.", category: "required"}
+ format: {description: "Format of input. Allowed values are [fasta, fastq, fastq-interleaved, sam-se, sam-pe].", category: "advanced"}
outputPath: {description: "Where the output should be placed.", category: "advanced"}
- inputFiles: {description: "input sequence files. May be specified 1 or more times.", category: "required"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
- timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ referenceFiles: {description: "An array with all the generated reference files"}
}
}
@@ -68,20 +70,22 @@ task VcfEval {
File baselineIndex
File calls
File callsIndex
- File? evaluationRegions
- File? bedRegions
+ Boolean squashPloidy = false
+ String outputMode = "split"
String outputDir = "output/"
- File template
+ Array[File] referenceFiles
Boolean allRecords = false
Boolean decompose = false
Boolean refOverlap = false
+
+ File? evaluationRegions
+ File? bedRegions
String? sample
- Boolean squashPloidy = false
- String outputMode = "split"
- Int threads = 1 # tool default is number of cores in the system 😱
+
String rtgMem = "8G"
- String memory = "9G"
- Int timeMinutes = 1 + ceil(size([baseline, calls], "G") * 5)
+ Int threads = 1 # Tool default is number of cores in the system 😱.
+ String memory = "9GiB"
+ Int timeMinutes = 1 + ceil(size([baseline, calls], "GiB") * 5)
String dockerImage = "quay.io/biocontainers/rtg-tools:3.10.1--0"
}
@@ -94,7 +98,7 @@ task VcfEval {
~{"--evaluation-regions " + evaluationRegions} \
~{"--bed-regions " + bedRegions} \
--output ~{outputDir} \
- --template ~{template} \
+ --template $(dirname ~{referenceFiles[0]}) \
~{true="--all-records" false="" allRecords} \
~{true="--decompose" false="" decompose} \
~{true="--ref-overlap" false="" refOverlap} \
@@ -132,39 +136,47 @@ task VcfEval {
}
runtime {
- docker: dockerImage
cpu: threads
memory: memory
time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
- baseline: {description: "VCF file containing baseline variants", category: "required"}
- baselineIndex: {description: "The baseline's VCF index", category: "required"}
- calls: {description: "VCF file containing called variants", category: "required"}
- callsIndex: {description: "The call's VCF index", category: "required"}
- outputDir: {description: "Directory for output", category: "advanced"}
- bedRegions: {description: "if set, only read VCF records that overlap the ranges contained in the specified BED file", category: "advanced"}
- evaluationRegions: {description: "if set, evaluate within regions contained in the supplied BED file, allowing transborder matches. To be used for truth-set high-confidence regions or other regions of interest where region boundary effects should be minimized",
- category: "advanced"}
- template: {description: "SDF of the reference genome the variants are called against", category: "required"}
- allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\")",
- category: "common"}
- decompose: {description: "decompose complex variants into smaller constituents to allow partial credit", category: "common"}
- refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap)",
- category: "common"}
- sample: {description: "the name of the sample to select. Use , to select different sample names for baseline and calls. (Required when using multi-sample VCF files)",
- category: "common"}
- squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences",
- category: "common"}
- outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split)",
- category: "advanced"}
- threads: {description: "Number of threads. Default is 1", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
- timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- rtgMem: {description: "The amount of memory rtg will allocate to the JVM", category: "advanced"}
+ # inputs
+ baseline: {description: "VCF file containing baseline variants.", category: "required"}
+ baselineIndex: {description: "The baseline's VCF index.", category: "required"}
+ calls: {description: "VCF file containing called variants.", category: "required"}
+ callsIndex: {description: "The call's VCF index.", category: "required"}
+ squashPloidy: {description: "treat heterozygous genotypes as homozygous ALT in both baseline and calls, to allow matches that ignore zygosity differences.", category: "common"}
+ outputMode: {description: "output reporting mode. Allowed values are [split, annotate, combine, ga4gh, roc-only] (Default is split).", category: "advanced"}
+ outputDir: {description: "Directory for output.", category: "advanced"}
+ referenceFiles: {description: "An array of reference Files generated by the Format task.", category: "required"}
+ allRecords: {description: "use all records regardless of FILTER status (Default is to only process records where FILTER is \".\" or \"PASS\").", category: "common"}
+ decompose: {description: "decompose complex variants into smaller constituents to allow partial credit.", category: "common"}
+ refOverlap: {description: "allow alleles to overlap where bases of either allele are same-as-ref (Default is to only allow VCF anchor base overlap).", category: "common"}
+ sample: {description: "the name of the sample to select. Use , to select different sample names for baseline and calls. (Required when using multi-sample VCF files).", category: "common"}
+ bedRegions: {description: "if set, only read VCF records that overlap the ranges contained in the specified BED file.", category: "advanced"}
+ evaluationRegions: {description: "if set, evaluate within regions contained in the supplied BED file, allowing transborder matches. To be used for truth-set high-confidence regions or other regions of interest where region boundary effects should be minimized.", category: "advanced"}
+ rtgMem: {description: "The amount of memory rtg will allocate to the JVM.", category: "advanced"}
+ threads: {description: "Number of threads. Default is 1.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ falseNegativesVcf: {description: "Variants from thebaselineVCF which were not correctly called."}
+ falseNegativesVcfIndex: {description: "Index of the output VCF file `falseNegativesVcf`."}
+ falsePositivesVcf: {description: "Variants from thecallsVCF which do not agree with baseline variants."}
+ falsePositivesVcfIndex: {description: "Index of the output VCF file `falsePositivesVcf`."}
+ summary: {description: "Summary statistic file."}
+ truePositivesBaselineVcf: {description: "Variants from thebaselineVCF which agree with variants in thecalls VCF."}
+ truePositivesBaselineVcfIndex: {description: "Index of the output VCF file `truePositivesBaselineVcf`."}
+ truePositivesVcf: {description: "Variants from thecallsVCF which agree with variants in the baseline VCF."}
+ truePositivesVcfIndex: {description: "Index of the output VCF file `truePositivesVcf`."}
+ nonSnpRoc: {description: "ROC data derived from those variants which were not represented asSNPs."}
+ phasing: {description: "Phasing file."}
+ weightedRoc: {description: "ROC data derived from all analyzed call variants, regardless of their representation."}
+ allStats: {description: "All output files combined in a array."}
}
}
-
diff --git a/sambamba.wdl b/sambamba.wdl
index cd8da21e..be347f94 100644
--- a/sambamba.wdl
+++ b/sambamba.wdl
@@ -20,29 +20,74 @@ version 1.0
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
+task Flagstat {
+ input {
+ File inputBam
+ File inputBamIndex
+ String outputPath = "./flagstat.txt"
+
+ Int threads = 2
+ String memory = "8GiB"
+ Int timeMinutes = 320
+ String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
+ }
+
+ command {
+ sambamba flagstat \
+ -t ~{threads} \
+ ~{inputBam} \
+ > ~{outputPath}
+ }
+
+ output {
+ File stats = outputPath
+ }
+
+ runtime {
+ cpu: threads
+ memory: memory
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ inputBam: {description: "The input BAM file.", category: "required"}
+ inputBamIndex: {description: "The index for the BAM file.", category: "required"}
+ outputPath: {description: "The path to write the ouput to.", category: "required"}
+
+ threads: {description: "The number of threads that will be used for this task.", category: "advanced"}
+ memory: {description: "The amount of memory available to the job.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ }
+}
+
task Markdup {
input {
Array[File] inputBams
String outputPath
- # Sambamba scales like this: 1 thread is fully utilized (1). 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7.
- # 2 threads reduces wall clock time by more than 40%.
- Int threads = 2
Int compressionLevel = 1
+ # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1.
+ Int sortBufferSize = 4096
+ Int ioBufferSize = 128
+ Boolean removeDuplicates = false
+
Int? hashTableSize
Int? overFlowListSize
- # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1
- Int sortBufferSize = 2048
- Int ioBufferSize = 128
- Boolean removeDuplicates = false
+ # Sambamba scales like this: 1 thread is fully utilized (1).
+ # 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7.
+ # 2 threads reduces wall clock time by more than 40%.
+ Int threads = 2
# According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size.
- # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB.
- Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize
- String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
+ # Added 8192 mb as a margin of safety. Real life use with this setting uses 2.7 GiB.
+ Int memoryMb = 8192 + sortBufferSize + 2 * ioBufferSize
# Time minute calculation does not work well for higher number of threads.
- Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads
+ Int timeMinutes = 1 + ceil(size(inputBams, "GiB") * 25) / threads
+ String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
}
+
String bamIndexPath = sub(outputPath, "\.bam$", ".bai")
command {
@@ -57,7 +102,7 @@ task Markdup {
~{"--sort-buffer-size " + sortBufferSize} \
~{"--io-buffer-size " + ioBufferSize} \
~{sep=' ' inputBams} ~{outputPath}
- # sambamba creates an index for us
+ # sambamba creates an index for us.
mv ~{outputPath}.bai ~{bamIndexPath}
}
@@ -67,8 +112,8 @@ task Markdup {
}
runtime {
- memory: "~{memoryMb}M"
cpu: threads
+ memory: "~{memoryMb}MiB"
time_minutes: timeMinutes
docker: dockerImage
}
@@ -78,17 +123,19 @@ task Markdup {
inputBams: {description: "The input BAM files.", category: "required"}
outputPath: {description: "Output directory path + output file.", category: "required"}
compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
- memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"}
- removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"}
- hashTableSize: {description: "Sets sambamba's hash table size", category: "advanced"}
- overFlowListSize: {description: "Sets sambamba's overflow list size", category: "advanced"}
- sortBufferSize: {description: "The amount of mb allocated to the sort buffer", category: "advanced"}
+ sortBufferSize: {description: "The amount of mb allocated to the sort buffer.", category: "advanced"}
ioBufferSize: {description: "The amount of mb allocated to each IO buffer. Sambamba uses two IO buffers.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"}
+ hashTableSize: {description: "Sets sambamba's hash table size.", category: "advanced"}
+ overFlowListSize: {description: "Sets sambamba's overflow list size.", category: "advanced"}
threads: {description: "The number of threads that will be used for this task.", category: "advanced"}
+ memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
# outputs
outputBam: {description: "Sorted BAM file."}
+ outputBamIndex: {description: "Sorted BAM file index."}
}
}
@@ -98,14 +145,15 @@ task Sort {
String outputPath = basename(inputBam, "\.bam") + ".sorted.bam"
Boolean sortByName = false
Int compressionLevel = 1
- Int threads = 1
+
Int memoryPerThreadGb = 4
+ Int threads = 1
Int memoryGb = 1 + threads * memoryPerThreadGb
+ Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3)
String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
- Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3)
}
- # Select first needed as outputPath is optional input. (bug in cromwell)
+ # Select first needed as outputPath is optional input (bug in cromwell).
String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai")
command {
@@ -118,7 +166,7 @@ task Sort {
-m ~{memoryPerThreadGb}G \
-o ~{outputPath} \
~{inputBam}
- # sambamba creates an index for us
+ # sambamba creates an index for us.
mv ~{outputPath}.bai ~{bamIndexPath}
}
@@ -129,7 +177,7 @@ task Sort {
runtime {
cpu: threads
- memory: "~{memoryGb}G"
+ memory: "~{memoryGb}GiB"
docker: dockerImage
time_minutes: timeMinutes
}
@@ -140,12 +188,14 @@ task Sort {
outputPath: {description: "Output directory path + output file.", category: "required"}
sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"}
compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
- memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"}
- memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"}
threads: {description: "The number of threads that will be used for this task.", category: "advanced"}
+ memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
# outputs
outputBam: {description: "Sorted BAM file."}
+ outputBamIndex: {description: "Sorted BAM file index."}
}
-}
\ No newline at end of file
+}
diff --git a/samtools.wdl b/samtools.wdl
index c155f026..711cb906 100644
--- a/samtools.wdl
+++ b/samtools.wdl
@@ -24,10 +24,13 @@ task BgzipAndIndex {
input {
File inputFile
String outputDir
- String type = "vcf"
+ String preset = "vcf"
- Int timeMinutes = 1 + ceil(size(inputFile, "G"))
- String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
+ Int compressLevel = 1
+ Int threads = 1
+ String memory = "2GiB"
+ Int timeMinutes = 1 + ceil(size(inputFile, "GiB"))
+ String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1"
}
String outputGz = outputDir + "/" + basename(inputFile) + ".gz"
@@ -35,8 +38,15 @@ task BgzipAndIndex {
command {
set -e
mkdir -p "$(dirname ~{outputGz})"
- bgzip -c ~{inputFile} > ~{outputGz}
- tabix ~{outputGz} -p ~{type}
+ bgzip \
+ --threads ~{threads} \
+ --compress-level ~{compressLevel} \
+ -c ~{inputFile} > ~{outputGz}
+
+ tabix \
+ --preset ~{preset} \
+ --threads ~{threads - 1} \
+ ~{outputGz}
}
output {
@@ -45,6 +55,8 @@ task BgzipAndIndex {
}
runtime {
+ cpu: threads
+ memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
@@ -53,10 +65,61 @@ task BgzipAndIndex {
# inputs
inputFile: {description: "The file to be compressed and indexed.", category: "required"}
outputDir: {description: "The directory in which the output will be placed.", category: "required"}
- type: {description: "The type of file (eg. vcf or bed) to be compressed and indexed.", category: "common"}
+ preset: {description: "The preset for the file (eg. vcf or bed) to be compressed and indexed.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ compressLevel: {description: "Set compression level.", category: "advanced"}
+ threads: {description: "The number of threads to use.", category: "advanced"}
+
+ # outputs
+ compressed: {description: "Compressed input file."}
+ index: {description: "Index of the compressed input file."}
+ }
+}
+
+task DictAndFaidx {
+ input {
+ File inputFile
+ String memory = "3GiB"
+ Int timeMinutes = 5 + ceil(size(inputFile, "GiB") * 5)
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
+ }
+
+ String outputFile = basename(inputFile)
+ # Capture .fa¸ .fna and .fasta
+ String outputDict = sub(outputFile, "\.fn?as?t?a?$", "") + ".dict"
+ # This executes both dict and faidx, so indexes are co-located in the same folder.
+ command <<<
+ set -e
+ cp ~{inputFile} ~{outputFile}
+ samtools dict -o ~{outputDict} ~{outputFile}
+ samtools faidx ~{outputFile} --fai-idx ~{outputFile}.fai
+ >>>
+
+ output {
+ File outputFasta = outputFile
+ File outputFastaDict = outputDict
+ File outputFastaFai = outputFile + ".fai"
+ }
+
+ runtime {
+ memory: memory
+ docker: dockerImage
+ time_minutes: timeMinutes
+ cpu: 1
+ }
+
+ parameter_meta {
+ # inputs
+ inputFile: {description: "The input fasta file.", category: "required"}
+ memory: {description: "The amount of memory available to the job.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ # outputs
+ outputFasta: {description: "Fasta file that is co-located with the indexes"}
+ outputFastaFai: {description: "Fasta index file for the outputFasta file."}
+ outputFastaDict: {description: "Sequence dictionary for the outputFasta file."}
}
}
@@ -65,8 +128,8 @@ task Faidx {
File inputFile
String outputDir
- String memory = "2G"
- String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+ String memory = "2GiB"
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
}
command {
@@ -104,38 +167,44 @@ task Fastq {
String outputRead1
String? outputRead2
String? outputRead0
+ String? outputReadS
+ Boolean appendReadNumber = false
+ Boolean outputQuality = false
+
Int? includeFilter
Int? excludeFilter
Int? excludeSpecificFilter
- Boolean appendReadNumber = false
- Boolean outputQuality = false
- Int? compressionLevel
+ Int compressionLevel = 1
Int threads = 1
- String memory = "1G"
- Int timeMinutes = 1 + ceil(size(inputBam) * 2)
- String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+ String memory = "1GiB"
+ Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2)
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
}
command {
+ set -e
+ mkdir -p "$(dirname ~{outputRead1})"
+ samtools collate -u -O ~{inputBam} | \
samtools fastq \
- ~{true="-1" false="-s" defined(outputRead2)} ~{outputRead1} \
+ ~{"-1 " + outputRead1} \
~{"-2 " + outputRead2} \
~{"-0 " + outputRead0} \
+ ~{"-s " + outputReadS} \
~{"-f " + includeFilter} \
~{"-F " + excludeFilter} \
~{"-G " + excludeSpecificFilter} \
~{true="-N" false="-n" appendReadNumber} \
~{true="-O" false="" outputQuality} \
- ~{"-c " + compressionLevel} \
- ~{"--threads " + threads} \
- ~{inputBam}
+ -c ~{compressionLevel} \
+ "--threads " ~{threads - 1}
}
output {
File read1 = outputRead1
File? read2 = outputRead2
File? read0 = outputRead0
+ File? readS = outputReadS
}
runtime {
@@ -151,16 +220,22 @@ task Fastq {
outputRead1: {description: "The location the reads (first reads for pairs, in case of paired-end sequencing) should be written to.", category: "required"}
outputRead2: {description: "The location the second reads from pairs should be written to.", category: "common"}
outputRead0: {description: "The location the unpaired reads should be written to (in case of paired-end sequenicng).", category: "advanced"}
- includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`", category: "advanced"}
- excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`", category: "advanced"}
- excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`", category: "advanced"}
- appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`", category: "advanced"}
+ outputReadS: {description: "The location singleton reads should be written to.", category: "advanced"}
+ appendReadNumber: {description: "Append /1 and /2 to the read name, or don't. Corresponds to `-n/N`.", category: "advanced"}
outputQuality: {description: "Equivalent to samtools fastq's `-O` flag.", category: "advanced"}
+ includeFilter: {description: "Include reads with ALL of these flags. Corresponds to `-f`.", category: "advanced"}
+ excludeFilter: {description: "Exclude reads with ONE OR MORE of these flags. Corresponds to `-F`.", category: "advanced"}
+ excludeSpecificFilter: {description: "Exclude reads with ALL of these flags. Corresponds to `-G`.", category: "advanced"}
+ compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"}
threads: {description: "The number of threads to use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ read1: {description: "Reads with the READ1 FLAG set."}
+ read2: {description: "Reads with the READ2 FLAG set."}
+ read0: {description: "Reads with either READ1 FLAG or READ2 flag set."}
}
}
@@ -168,9 +243,10 @@ task FilterShortReadsBam {
input {
File bamFile
String outputPathBam
- String memory = "1G"
- Int timeMinutes = 1 + ceil(size(bamFile, "G") * 8)
- String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+
+ String memory = "1GiB"
+ Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 8)
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
}
String outputPathBamIndex = sub(outputPathBam, "\.bam$", ".bai")
@@ -196,11 +272,16 @@ task FilterShortReadsBam {
}
parameter_meta {
+ # inputs
bamFile: {description: "The bam file to process.", category: "required"}
outputPathBam: {description: "The filtered bam file.", category: "common"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ filteredBam: {description: "BAM file filtered for short reads."}
+ filteredBamIndex: {description: "Index of filtered BAM file."}
}
}
@@ -209,15 +290,20 @@ task Flagstat {
File inputBam
String outputPath
- String memory = "256M" # Only 40.5 MiB used for 150G bam file.
+ Int threads = 1
+
+ String memory = "256MiB" # Only 40.5 MiB used for 150G bam file.
Int timeMinutes = 1 + ceil(size(inputBam, "G"))
- String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
}
command {
set -e
mkdir -p "$(dirname ~{outputPath})"
- samtools flagstat ~{inputBam} > ~{outputPath}
+
+ samtools flagstat \
+ --threads ~{threads - 1} \
+ ~{inputBam} > ~{outputPath}
}
output {
@@ -225,6 +311,7 @@ task Flagstat {
}
runtime {
+ cpu: threads
memory: memory
time_minutes: timeMinutes
docker: dockerImage
@@ -236,18 +323,25 @@ task Flagstat {
outputPath: {description: "The location the ouput should be written to.", category: "required"}
memory: {description: "The amount of memory needed for the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ threads: {description: "The number of threads to use.", category: "advanced"}
+
+ # outputs
+ flagstat: {description: "The number of alignments for each FLAG type."}
}
}
task Index {
input {
File bamFile
+
String? outputBamPath
- String memory = "2G"
- Int timeMinutes = 1 + ceil(size(bamFile, "G") * 4)
- String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+
+ Int threads = 1
+
+ String memory = "2GiB"
+ Int timeMinutes = 1 + ceil(size(bamFile, "GiB") * 4)
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
}
# Select_first is needed, otherwise womtool validate fails.
@@ -261,9 +355,11 @@ task Index {
if [ ! -f ~{outputPath} ]
then
mkdir -p "$(dirname ~{outputPath})"
- ln ~{bamFile} ~{outputPath}
+ ln ~{bamFile} ~{outputPath} || cp ~{bamFile} ~{outputPath}
fi
- samtools index ~{outputPath} ~{bamIndexPath}
+ samtools index \
+ --threads ~{threads -1} \
+ ~{outputPath} ~{bamIndexPath}
'
}
@@ -273,6 +369,7 @@ task Index {
}
runtime {
+ cpu: threads
memory: memory
time_minutes: timeMinutes
docker: dockerImage
@@ -281,12 +378,15 @@ task Index {
parameter_meta {
# inputs
bamFile: {description: "The BAM file for which an index should be made.", category: "required"}
- outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.",
- category: "common"}
+ outputBamPath: {description: "The location where the BAM file should be written to. The index will appear alongside this link to the BAM file.", category: "common"}
memory: {description: "The amount of memory needed for the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ threads: {description: "The number of threads to use.", category: "advanced"}
+
+ # outputs
+ indexedBam: {description: "BAM file that was indexed."}
+ index: {description: "Index of the input BAM file."}
}
}
@@ -294,15 +394,18 @@ task Markdup {
input {
File inputBam
String outputBamPath
+ Int threads = 1
- Int timeMinutes = 1 + ceil(size(inputBam, "G") * 2)
- String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+ Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2)
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
}
command {
set -e
mkdir -p "$(dirname ~{outputBamPath})"
- samtools markdup ~{inputBam} ~{outputBamPath}
+ samtools markdup \
+ --threads ~{threads - 1} \
+ ~{inputBam} ~{outputBamPath}
}
output {
@@ -310,6 +413,7 @@ task Markdup {
}
runtime {
+ cpu: threads
docker: dockerImage
time_minutes: timeMinutes
}
@@ -319,8 +423,11 @@ task Markdup {
inputBam: {description: "The BAM file to be processed.", category: "required"}
outputBamPath: {description: "The location of the output BAM file.", category: "required"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+ threads: {description: "The number of threads to use.", category: "advanced"}
+
+ # outputs
+ outputBam: {description: "BAM file with duplicate alignments marked."}
}
}
@@ -329,11 +436,18 @@ task Merge {
Array[File]+ bamFiles
String outputBamPath = "merged.bam"
Boolean force = true
- Int threads = 1
- Int timeMinutes = 1 + ceil(size(bamFiles, "G") * 2)
- String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+ Boolean combineRGHeaders = false
+ Boolean combinePGHeaders = false
+
+ Int compressionLevel = 1
+ # Use one thread per input + one for the output + one for merging
+ Int threads = length(bamFiles) + 2
+ String memory = "4GiB"
+ Int timeMinutes = 1 + ceil(size(bamFiles, "GiB") * 4)
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
}
+
String indexPath = sub(outputBamPath, "\.bam$",".bai")
# Samtools uses additional threads for merge.
@@ -343,8 +457,11 @@ task Merge {
samtools merge \
--threads ~{threads - 1} \
~{true="-f" false="" force} \
+ -l ~{compressionLevel} \
+ ~{true="-c" false="" combineRGHeaders} \
+ ~{true="-p" false="" combinePGHeaders} \
~{outputBamPath} ~{sep=' ' bamFiles}
- samtools index ~{outputBamPath} ~{indexPath}
+ samtools index -@ ~{threads - 1} ~{outputBamPath} ~{indexPath}
}
output {
@@ -354,8 +471,9 @@ task Merge {
runtime {
cpu: threads
- docker: dockerImage
+ memory: memory
time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
@@ -363,9 +481,54 @@ task Merge {
bamFiles: {description: "The BAM files to merge.", category: "required"}
outputBamPath: {description: "The location the merged BAM file should be written to.", category: "common"}
force: {description: "Equivalent to samtools merge's `-f` flag.", category: "advanced"}
+
+ combineRGHeaders: {description: "Combine @RG headers with colliding IDs", category: "advanced"}
+ combinePGHeaders: {description: "Combine @PG headers with colliding IDs", category: "advanced"}
+
+ compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
+
+ threads: {description: "Number of threads to use.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputBam: {description: "Multiple BAM files merged into one."}
+ outputBamIndex: {description: "Index of the merged BAM file."}
+ }
+}
+
+task Quickcheck {
+ input {
+ File inputBam
+
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
+ }
+
+ command {
+ set -e
+ samtools quickcheck ~{inputBam}
+ }
+
+ output {
+ File outputBam = inputBam
+ }
+
+ runtime {
+ cpu: 1
+ time_minutes: 5
+ memory: "1GiB"
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ inputBam: {description: "The input BAM/SAM/CRAM file.", category: "required"}
+
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputBam: {description: "The exact same input file, but use this so it is recognised as a dependent task."}
}
}
@@ -375,14 +538,15 @@ task Sort {
String outputPath = basename(inputBam, "\.bam") + ".sorted.bam"
Boolean sortByName = false
Int compressionLevel = 1
- Int threads = 1
+
Int memoryPerThreadGb = 4
+ Int threads = 1
Int memoryGb = 1 + threads * memoryPerThreadGb
- String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
- Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3)
+ Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 3)
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
}
- # Select first needed as outputPath is optional input. (bug in cromwell)
+ # Select first needed as outputPath is optional input (bug in cromwell).
String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai")
command {
@@ -396,7 +560,7 @@ task Sort {
-o ~{outputPath} \
~{inputBam}
samtools index \
- -@ ~{threads} \
+ --threads ~{threads - 1} \
~{outputPath} ~{bamIndexPath}
}
@@ -406,10 +570,10 @@ task Sort {
}
runtime {
- cpu: 1
- memory: "~{memoryGb}G"
- docker: dockerImage
+ cpu: threads
+ memory: "~{memoryGb}GiB"
time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
@@ -418,34 +582,97 @@ task Sort {
outputPath: {description: "Output directory path + output file.", category: "required"}
sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"}
compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
+ memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes.", category: "advanced"}
+ threads: {description: "The number of threads that will be used for this task.", category: "advanced"}
memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"}
- memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
- threads: {description: "The number of additional threads that will be used for this task.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
outputBam: {description: "Sorted BAM file."}
+ outputBamIndex: {description: "Sorted BAM file index."}
+ }
+}
+
+task Split {
+ input {
+ File inputBam
+ String outputPath
+ String? unaccountedPath
+ String filenameFormat = "%!.%."
+
+ Int compressionLevel = 1
+
+ Int threads = 1
+ String memory = "1GiB"
+ Int timeMinutes = 1 + ceil(size(inputBam, "GiB") * 2)
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
+ }
+
+ command {
+ set -e
+ mkdir -p "~{outputPath}/rg/"
+
+ samtools split \
+ --output-fmt bam \
+ --output-fmt-option level=~{compressionLevel} \
+ -f "~{outputPath}/rg/~{filenameFormat}" \
+ ~{"-u " + unaccountedPath} \
+ --threads ~{threads - 1} \
+ --write-index \
+ ~{inputBam}
+ }
+
+ output {
+ Array[File] splitBam = glob(outputPath + "/rg/*.bam")
+ Array[File] splitBamIndex = glob(outputPath + "/rg/*.bam.csi")
+ File? unaccounted = unaccountedPath
+ }
+
+ runtime {
+ cpu: threads
+ memory: memory
+ docker: dockerImage
+ time_minutes: timeMinutes
+ }
+
+ parameter_meta {
+ # inputs
+ inputBam: {description: "The bam file to split.", category: "required"}
+ outputPath: {description: "Directory to store output bams", category: "required"}
+
+ # Optional parameters
+ unaccountedPath: {description: "The location to write reads to which are not detected as being part of an existing read group.", category: "common"}
+ filenameFormat: {description: "Format of the filename, the following tokens can be used: %% a literal % sign, %* basename, %# @RG index, %! @RG ID, %. filename extension for output format", category: "common"}
+ compressionLevel: {description: "Set compression level when writing gz or bgzf fastq files.", category: "advanced"}
+
+ # outputs
+ splitBam: {description: "BAM file split by read groups"}
+ splitBamIndex: {description: "BAM indexes"}
+ unaccounted: {description: "Reads with no RG tag or an unrecognised RG tag."}
}
}
task Tabix {
input {
File inputFile
- String outputFilePath = "indexed.vcf.gz"
- String type = "vcf"
- Int timeMinutes = 1 + ceil(size(inputFile, "G") * 2)
- String dockerImage = "quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
+ String outputFilePath = basename(inputFile)
+ String preset = "vcf"
+
+ Int timeMinutes = 1 + ceil(size(inputFile, "GiB") * 2)
+ String dockerImage = "quay.io/biocontainers/htslib:1.21--h566b1c6_1"
}
- # FIXME: It is better to do the indexing on VCF creation. Not in a separate task. With file localization this gets hairy fast.
+
+ # FIXME: It is better to do the indexing on VCF creation.
+ # Not in a separate task. With file localization this gets hairy fast.
command {
set -e
mkdir -p "$(dirname ~{outputFilePath})"
if [ ! -f ~{outputFilePath} ]
then
- ln ~{inputFile} ~{outputFilePath}
+ ln ~{inputFile} ~{outputFilePath} || cp ~{inputFile} ~{outputFilePath}
fi
- tabix ~{outputFilePath} -p ~{type}
+ tabix ~{outputFilePath} -p ~{preset}
}
output {
@@ -454,53 +681,64 @@ task Tabix {
}
runtime {
+ memory: "2GiB"
time_minutes: timeMinutes
- docker: dockerImage
+ docker: dockerImage
}
parameter_meta {
# inputs
inputFile: {description: "The file to be indexed.", category: "required"}
- outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.",
- category: "common"}
- type: {description: "The type of file (eg. vcf or bed) to be indexed.", category: "common"}
+ outputFilePath: {description: "The location where the file should be written to. The index will appear alongside this link to the file.", category: "common"}
+ preset: {description: "The preset for the file (eg. vcf or bed) to be indexed.", category: "common"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ indexedFile: {description: "Indexed input file."}
+ index: {description: "Index of the input file."}
}
}
task View {
input {
File inFile
- File? referenceFasta
String outputFileName = "view.bam"
Boolean uncompressedBamOutput = false
+
+ File? referenceFasta
Int? includeFilter
Int? excludeFilter
Int? excludeSpecificFilter
Int? MAPQthreshold
+ File? targetFile
+
+ Boolean fast = true # Sets compression level to 1.
Int threads = 1
- String memory = "1G"
- Int timeMinutes = 1 + ceil(size(inFile, "G") * 5)
- String dockerImage = "quay.io/biocontainers/samtools:1.10--h9402c20_2"
+ String memory = "1GiB"
+ Int timeMinutes = 1 + ceil(size(inFile, "GiB") * 5)
+ String dockerImage = "quay.io/biocontainers/samtools:1.21--h96c455f_1"
}
+
String outputIndexPath = basename(outputFileName) + ".bai"
- # Always output to bam and output header
+ # Always output to bam and output header.
+ # -u should be after --fast, and will override it in that case.
command {
set -e
mkdir -p "$(dirname ~{outputFileName})"
samtools view -b \
~{"-T " + referenceFasta} \
~{"-o " + outputFileName} \
+ ~{true="--fast" false="" fast} \
~{true="-u " false="" uncompressedBamOutput} \
~{"-f " + includeFilter} \
~{"-F " + excludeFilter} \
~{"-G " + excludeSpecificFilter} \
~{"-q " + MAPQthreshold} \
- ~{"--threads " + (threads - 1)} \
+ --threads ~{threads - 1} \
+ ~{"--target-file " + targetFile} \
~{inFile}
samtools index ~{outputFileName} ~{outputIndexPath}
}
@@ -520,17 +758,22 @@ task View {
parameter_meta {
# inputs
inFile: {description: "A BAM, SAM or CRAM file.", category: "required"}
- referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"}
outputFileName: {description: "The location the output BAM file should be written.", category: "common"}
+ fast: {description: "Sets compression level to 1. Set to true by default.", category: "common"}
uncompressedBamOutput: {description: "Equivalent to samtools view's `-u` flag.", category: "advanced"}
+ referenceFasta: {description: "The reference fasta file also used for mapping.", category: "advanced"}
includeFilter: {description: "Equivalent to samtools view's `-f` option.", category: "advanced"}
excludeFilter: {description: "Equivalent to samtools view's `-F` option.", category: "advanced"}
excludeSpecificFilter: {description: "Equivalent to samtools view's `-G` option.", category: "advanced"}
MAPQthreshold: {description: "Equivalent to samtools view's `-q` option.", category: "advanced"}
+ targetFile: {description: "A BED file with regions to include", caegory: "advanced"}
threads: {description: "The number of threads to use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputBam: {description: "Processed input file."}
+ outputBamIndex: {description: "Index of the processed input file."}
}
}
diff --git a/scripts b/scripts
index c0b48b0a..4142daab 160000
--- a/scripts
+++ b/scripts
@@ -1 +1 @@
-Subproject commit c0b48b0a916913d1e6751d7744d1cec37559a81f
+Subproject commit 4142daab81a7d9f28686b6a3299536757d381c81
diff --git a/seqtk.wdl b/seqtk.wdl
index 321ab132..f6fa422b 100644
--- a/seqtk.wdl
+++ b/seqtk.wdl
@@ -24,11 +24,12 @@ task Sample {
input {
File sequenceFile
String outFilePath = "subsampledReads.fq.gz"
- String? preCommand
- Int? seed
Boolean twoPassMode = false
- Float fractionOrNumber # when above 1.0 is the number of reads, otherwise it's a fraction
+ Float fractionOrNumber # When above 1.0 is the number of reads, otherwise it's a fraction.
Boolean zip = true
+
+ String? preCommand
+ Int? seed
}
command {
@@ -47,4 +48,4 @@ task Sample {
output {
File subsampledReads = outFilePath
}
-}
\ No newline at end of file
+}
diff --git a/sequali.wdl b/sequali.wdl
new file mode 100644
index 00000000..cbca3653
--- /dev/null
+++ b/sequali.wdl
@@ -0,0 +1,73 @@
+version 1.0
+
+# Copyright (c) 2024 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Sequali {
+ input {
+ File reads
+ File? mate_reads
+ String outDir = "."
+
+ Int threads = 2
+ String memory = "4GiB"
+ String dockerImage = "quay.io/biocontainers/sequali:0.12.0--py312hf67a6ed_0"
+ Int timeMinutes = 10 + ceil(size(reads, "GiB") + size(mate_reads, "GiB")) * 4
+ }
+
+ command <<<
+ set -e
+ mkdir -p $(dirname outputDir)
+ sequali \
+ --outdir ~{outDir} \
+ --threads ~{threads} \
+ ~{reads} \
+ ~{mate_reads}
+ >>>
+
+ output {
+ File html = outDir + "/" + basename(reads) + ".html"
+ File json = outDir + "/" + basename(reads) + ".json"
+ }
+
+ runtime {
+ cpu: threads
+ memory: memory
+ docker: dockerImage
+ time_minutes: timeMinutes
+ }
+
+ parameter_meta {
+ # inputs
+ reads: {description: "A FASTQ or BAM file.", category: "required"}
+ mate_reads: {description: "FASTQ mate file"}
+ threads: {description: "The number of cores to use.", category: "advanced"}
+
+ outDir: {description: "The path to write the output to.", catgory: "required"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ html: {description: "HTML report file."}
+ json: {description: "JSON report file for use with MultiQC."}
+ }
+}
\ No newline at end of file
diff --git a/smoove.wdl b/smoove.wdl
index e8846f72..7a1ac38b 100644
--- a/smoove.wdl
+++ b/smoove.wdl
@@ -1,7 +1,5 @@
version 1.0
-# MIT License
-#
# Copyright (c) 2020 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -31,9 +29,9 @@ task Call {
String sample
String outputDir = "./smoove"
- String memory = "15G"
- String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0"
+ String memory = "15GiB"
Int timeMinutes = 1440
+ String dockerImage = "quay.io/biocontainers/smoove:0.2.5--0"
}
command {
@@ -43,18 +41,19 @@ task Call {
--outdir ~{outputDir} \
--name ~{sample} \
--fasta ~{referenceFasta} \
+ --removepr \
+ --genotype \
~{bamFile}
}
output {
- File smooveVcf = outputDir + "/" + sample + "-smoove.vcf.gz"
+ File smooveVcf = outputDir + "/" + sample + "-smoove.genotyped.vcf.gz"
}
runtime {
memory: memory
- docker: dockerImage
time_minutes: timeMinutes
-
+ docker: dockerImage
}
parameter_meta {
@@ -63,10 +62,13 @@ task Call {
bamIndex: {description: "The index of the bam file.", category: "required"}
referenceFasta: {description: "The reference fasta file also used for mapping.", category: "required"}
referenceFastaFai: {description: "Fasta index (.fai) file of the reference.", category: "required" }
- outputDir: {description: "The location the output VCF file should be written.", category: "common"}
sample: {description: "The name of the sample.", category: "required"}
+ outputDir: {description: "The location the output VCF file should be written.", category: "common"}
memory: {description: "The memory required to run the programs.", category: "advanced"}
timeMinutes: {description: "The maximum duration (in minutes) the tool is allowed to run.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ smooveVcf: {description: "Calls of structural variants in VCF file."}
}
}
diff --git a/snpeff.wdl b/snpeff.wdl
new file mode 100644
index 00000000..b972ab30
--- /dev/null
+++ b/snpeff.wdl
@@ -0,0 +1,111 @@
+version 1.0
+
+# MIT License
+#
+# Copyright (c) 2020 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task SnpEff {
+ input {
+ File vcf
+ File vcfIndex
+ String genomeVersion
+ File datadirZip
+ String outputPath = "./snpeff.vcf"
+ Boolean hgvs = true
+ Boolean lof = true
+ Boolean noDownstream = false
+ Boolean noUpstream = false
+ Boolean noIntergenic = false
+ Boolean noShiftHgvs = false
+ Int? upDownStreamLen
+
+ String memory = "9GiB"
+ String javaXmx = "8G"
+ Int timeMinutes = 60
+ # Multicontainer with snpeff 5.2 and bgzip/tabix 1.19.1
+ String dockerImage = "quay.io/biocontainers/mulled-v2-2fe536b56916bd1d61a6a1889eb2987d9ea0cd2f:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0"
+ }
+
+ Boolean compressed = basename(outputPath) != basename(outputPath, ".gz")
+
+ command {
+ set -e
+ ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud
+ mkdir -p "$(dirname ~{outputPath})"
+ unzip ~{datadirZip}
+ snpEff -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ -v \
+ ~{genomeVersion} \
+ -noDownload \
+ -dataDir $PWD/data \
+ ~{vcf} \
+ ~{true="-hgvs" false="-noHgvs" hgvs} \
+ ~{true="-lof" false="-noLof" lof} \
+ ~{true="-no-downstream" false="" noDownstream} \
+ ~{true="-no-upstream" false="" noUpstream} \
+ ~{true="-no-intergenic" false="" noIntergenic} \
+ ~{true="-noShiftHgvs" false="" noShiftHgvs} \
+ ~{"-upDownStreamLen " + upDownStreamLen} \
+ ~{if compressed then "| bgzip " else ""} > ~{outputPath}
+
+ ~{if compressed then "tabix ~{outputPath}" else ""}
+ rm -r $PWD/data
+ }
+
+ output {
+ File outputVcf = outputPath
+ File? outputVcfIndex = outputPath + ".tbi"
+ }
+
+ runtime {
+ docker: dockerImage
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ memory: memory
+ }
+
+ parameter_meta {
+ # inputs
+ vcf: {description: "A VCF file to analyse.", category: "required"}
+ vcfIndex: {description: "The index for the VCF file.", category: "required"}
+ genomeVersion: {description: "The version of the genome to be used. The database for this genome must be present in the datadirZip.", category: "required"}
+ datadirZip: {description: "A zip file containing the directory of databases. This zip file must contain a directory called `data`, with the database mentioned in the genomeVersion input as subdirectory.",
+ category: "required"}
+ outputPath: {description: "The path to write the output to.", category: "common"}
+ hgvs: {description: "Equivalent to `-hgvs` if true or `-noHgvs` if false.", category: "advanced"}
+ lof: {description: "Equivalent to `-lof` if true or `-noLof` if false.", category: "advanced"}
+ noDownstream: {description: "Equivalent to the `-no-downstream` flag.", category: "advanced"}
+ noUpstream: {description: "Equivalent to the `-no-upstream` flag.", category: "advanced"}
+ noIntergenic: {description: "Equivalent to the `-no-intergenic` flag.", category: "advanced"}
+ noShiftHgvs: {description: "Equivalent to the `-noShiftHgvs` flag.", category: "advanced"}
+ upDownStreamLen: {descriptoin: "Equivalent to the `-upDownStreamLen` option.", category: "advanced"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+
+ # outputs
+ outputVcf: {description: "Annotated VCF file."}
+ outputVcfIndex: {description: "Index of annotated VCF file."}
+ }
+}
diff --git a/snpsift.wdl b/snpsift.wdl
new file mode 100644
index 00000000..a62f7295
--- /dev/null
+++ b/snpsift.wdl
@@ -0,0 +1,84 @@
+version 1.0
+
+# MIT License
+#
+# Copyright (c) 2025 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task SnpSiftFilter {
+ input {
+ File vcf
+ File? vcfIndex
+ String filterExpression
+ String outputPath = "./snpsift_filter.vcf"
+
+ String memory = "9GiB"
+ String javaXmx = "8G"
+ Int timeMinutes = 60
+ # Multicontainer with SnpSift 5.2 and bgzip/tabix 1.22
+ String dockerImage = "quay.io/biocontainers/mulled-v2-d4bc0c23eb1d95c7ecff7f0e8b3a4255503fd5d4:c51b2e46bf63786b2d9a7a7d23680791163ab39a-0"
+ }
+
+ Boolean compressed = basename(outputPath) != basename(outputPath, ".gz")
+
+ command {
+ set -e
+ ls ~{vcf} ~{vcfIndex} # dxCompiler localization workaroud
+
+ mkdir -p "$(dirname ~{outputPath})"
+ SnpSift -Xmx~{javaXmx} -XX:ParallelGCThreads=1 \
+ filter \
+ "~{filterExpression}" \
+ ~{vcf} \
+ ~{if compressed then "| bgzip " else ""} > ~{outputPath}
+
+ ~{if compressed then "tabix ~{outputPath}" else ""}
+ }
+
+ output {
+ File outputVcf = outputPath
+ File? outputVcfIndex = outputPath + ".tbi"
+ }
+
+ runtime {
+ docker: dockerImage
+ time_minutes: timeMinutes # !UnknownRuntimeKey
+ memory: memory
+ }
+
+ parameter_meta {
+ # inputs
+ vcf: {description: "A VCF file to filter.", category: "required"}
+ vcfIndex: {description: "The index for the VCF file.", category: "common"}
+ filterExpression: {description: "The SnpSift filtering expression.", category: "required"}
+ outputPath: {description: "The path to write the output to.", category: "common"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
+ category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
+ category: "advanced"}
+
+ # outputs
+ outputVcf: {description: "Filtered VCF file."}
+ outputVcfIndex: {description: "Index of filtered VCF file."}
+ }
+}
diff --git a/somaticseq.wdl b/somaticseq.wdl
index 7b9a4403..7656d086 100644
--- a/somaticseq.wdl
+++ b/somaticseq.wdl
@@ -22,17 +22,18 @@ version 1.0
task ParallelPaired {
input {
- File? classifierSNV
- File? classifierIndel
String outputDir
File referenceFasta
File referenceFastaFai
- File? inclusionRegion
- File? exclusionRegion
File tumorBam
File tumorBamIndex
File normalBam
File normalBamIndex
+
+ File? classifierSNV
+ File? classifierIndel
+ File? inclusionRegion
+ File? exclusionRegion
File? mutect2VCF
File? varscanSNV
File? varscanIndel
@@ -46,6 +47,7 @@ task ParallelPaired {
File? strelkaSNV
File? strelkaIndel
+ String memory = "2GiB"
Int threads = 1
Int timeMinutes = 60
String dockerImage = "lethalfang/somaticseq:3.1.0"
@@ -90,22 +92,24 @@ task ParallelPaired {
runtime {
cpu: threads
+ memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
- classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"}
- classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"}
+ # inputs
outputDir: {description: "The directory to write the output to.", category: "common"}
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
- inclusionRegion: {description: "A bed file describing regions to include.", category: "common"}
- exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"}
- normalBam: {description: "The normal/control sample's BAM file.", category: "required"}
- normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"}
tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"}
tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"}
+ normalBam: {description: "The normal/control sample's BAM file.", category: "required"}
+ normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"}
+ classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"}
+ classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"}
+ inclusionRegion: {description: "A bed file describing regions to include.", category: "common"}
+ exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"}
mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"}
varscanSNV: {description: "An SNV VCF as produced by varscan.", category: "advanced"}
varscanIndel: {description: "An indel VCF as produced by varscan.", category: "advanced"}
@@ -118,11 +122,16 @@ task ParallelPaired {
scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"}
strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"}
strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"}
-
threads: {description: "The number of threads to use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ indels: {description: ""}
+ snvs: {description: ""}
+ ensembleIndels: {description: ""}
+ ensembleSNV: {description: ""}
}
}
@@ -133,12 +142,13 @@ task ParallelPairedTrain {
String outputDir
File referenceFasta
File referenceFastaFai
- File? inclusionRegion
- File? exclusionRegion
File tumorBam
File tumorBamIndex
File normalBam
File normalBamIndex
+
+ File? inclusionRegion
+ File? exclusionRegion
File? mutect2VCF
File? varscanSNV
File? varscanIndel
@@ -152,6 +162,7 @@ task ParallelPairedTrain {
File? strelkaSNV
File? strelkaIndel
+ String memory = "2GiB"
Int threads = 1
Int timeMinutes = 240
String dockerImage = "lethalfang/somaticseq:3.1.0"
@@ -195,22 +206,24 @@ task ParallelPairedTrain {
runtime {
cpu: threads
+ memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
+ # inputs
truthSNV: {description: "A VCF of true SNVs.", category: "required"}
truthIndel: {description: "A VCF of true indels.", category: "required"}
outputDir: {description: "The directory to write the output to.", category: "common"}
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
- inclusionRegion: {description: "A bed file describing regions to include.", category: "common"}
- exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"}
- normalBam: {description: "The normal/control sample's BAM file.", category: "required"}
- normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"}
tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"}
tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"}
+ normalBam: {description: "The normal/control sample's BAM file.", category: "required"}
+ normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"}
+ inclusionRegion: {description: "A bed file describing regions to include.", category: "common"}
+ exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"}
mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"}
varscanSNV: {description: "An SNV VCF as produced by varscan.", category: "advanced"}
varscanIndel: {description: "An indel VCF as produced by varscan.", category: "advanced"}
@@ -223,25 +236,33 @@ task ParallelPairedTrain {
scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"}
strelkaSNV: {description: "An SNV VCF as produced by strelka.", category: "advanced"}
strelkaIndel: {description: "An indel VCF as produced by somaticsniper.", category: "advanced"}
-
threads: {description: "The number of threads to use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ consensusIndels: {description: ""}
+ consensusSNV: {description: ""}
+ ensembleIndels: {description: ""}
+ ensembleSNV: {description: ""}
+ ensembleIndelsClassifier: {description: ""}
+ ensembleSNVClassifier: {description: ""}
}
}
task ParallelSingle {
input {
- File? classifierSNV
- File? classifierIndel
+ File bam
+ File bamIndex
String outputDir
File referenceFasta
File referenceFastaFai
+
+ File? classifierSNV
+ File? classifierIndel
File? inclusionRegion
File? exclusionRegion
- File bam
- File bamIndex
File? mutect2VCF
File? varscanVCF
File? vardictVCF
@@ -249,6 +270,7 @@ task ParallelSingle {
File? scalpelVCF
File? strelkaVCF
+ String memory = "2GiB"
Int threads = 1
Int timeMinutes = 60
String dockerImage = "lethalfang/somaticseq:3.1.0"
@@ -286,45 +308,53 @@ task ParallelSingle {
runtime {
cpu: threads
+ memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
- classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"}
- classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"}
+ # inputs
+ bam: {description: "The input BAM file.", category: "required"}
+ bamIndex: {description: "The index for the input BAM file.", category: "required"}
outputDir: {description: "The directory to write the output to.", category: "common"}
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ classifierSNV: {description: "A somaticseq SNV classifier.", category: "common"}
+ classifierIndel: {description: "A somaticseq Indel classifier.", category: "common"}
inclusionRegion: {description: "A bed file describing regions to include.", category: "common"}
exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"}
- bam: {description: "The input BAM file.", category: "required"}
- bamIndex: {description: "The index for the input BAM file.", category: "required"}
mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"}
varscanVCF: {description: "A VCF as produced by varscan.", category: "advanced"}
vardictVCF: {description: "A VCF as produced by vardict.", category: "advanced"}
lofreqVCF: {description: "A VCF as produced by lofreq.", category: "advanced"}
scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"}
strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"}
-
threads: {description: "The number of threads to use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ indels: {description: ""}
+ snvs: {description: ""}
+ ensembleIndels: {description: ""}
+ ensembleSNV: {description: ""}
}
}
task ParallelSingleTrain {
input {
+ File bam
+ File bamIndex
File truthSNV
File truthIndel
String outputDir
File referenceFasta
File referenceFastaFai
+
File? inclusionRegion
File? exclusionRegion
- File bam
- File bamIndex
File? mutect2VCF
File? varscanVCF
File? vardictVCF
@@ -332,6 +362,7 @@ task ParallelSingleTrain {
File? scalpelVCF
File? strelkaVCF
+ String memory = "2GiB"
Int threads = 1
Int timeMinutes = 240
String dockerImage = "lethalfang/somaticseq:3.1.0"
@@ -368,11 +399,15 @@ task ParallelSingleTrain {
runtime {
cpu: threads
+ memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
+ # inputs
+ bam: {description: "The input BAM file.", category: "required"}
+ bamIndex: {description: "The index for the input BAM file.", category: "required"}
truthSNV: {description: "A VCF of true SNVs.", category: "required"}
truthIndel: {description: "A VCF of true indels.", category: "required"}
outputDir: {description: "The directory to write the output to.", category: "common"}
@@ -380,19 +415,24 @@ task ParallelSingleTrain {
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
inclusionRegion: {description: "A bed file describing regions to include.", category: "common"}
exclusionRegion: {description: "A bed file describing regions to exclude.", category: "common"}
- bam: {description: "The input BAM file.", category: "required"}
- bamIndex: {description: "The index for the input BAM file.", category: "required"}
mutect2VCF: {description: "A VCF as produced by mutect2.", category: "advanced"}
varscanVCF: {description: "A VCF as produced by varscan.", category: "advanced"}
vardictVCF: {description: "A VCF as produced by vardict.", category: "advanced"}
lofreqVCF: {description: "A VCF as produced by lofreq.", category: "advanced"}
scalpelVCF: {description: "A VCF as produced by scalpel.", category: "advanced"}
strelkaVCF: {description: "A VCF as produced by strelka.", category: "advanced"}
-
threads: {description: "The number of threads to use.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ consensusIndels: {description: ""}
+ consensusSNV: {description: ""}
+ ensembleIndels: {description: ""}
+ ensembleSNV: {description: ""}
+ ensembleIndelsClassifier: {description: ""}
+ ensembleSNVClassifier: {description: ""}
}
}
@@ -400,17 +440,17 @@ task ModifyStrelka {
input {
File strelkaVCF
String outputVCFName = basename(strelkaVCF, ".gz")
- String dockerImage = "lethalfang/somaticseq:3.1.0"
+
+ String memory = "2GiB"
Int timeMinutes = 20
+ String dockerImage = "lethalfang/somaticseq:3.1.0"
}
command {
set -e
-
/opt/somaticseq/vcfModifier/modify_Strelka.py \
-infile ~{strelkaVCF} \
-outfile "modified_strelka.vcf"
-
first_FORMAT_line_num=$(grep -n -m 1 '##FORMAT' "modified_strelka.vcf" | cut -d : -f 1)
sed "$first_FORMAT_line_num"'i##FORMAT=' "modified_strelka.vcf" > ~{outputVCFName}
}
@@ -420,15 +460,20 @@ task ModifyStrelka {
}
runtime {
+ memory: memory
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
+ # inputs
strelkaVCF: {description: "A vcf file as produced by strelka.", category: "required"}
outputVCFName: {description: "The location the output VCF file should be written to.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVcf: {description: ""}
}
}
diff --git a/spades.wdl b/spades.wdl
index 204dbfea..d717ab28 100644
--- a/spades.wdl
+++ b/spades.wdl
@@ -22,10 +22,11 @@ version 1.0
task Spades {
input {
- String outputDir
- String? preCommand
File read1
File? read2
+ String outputDir
+
+ String? preCommand
File? interlacedReads
File? sangerReads
File? pacbioReads
@@ -33,23 +34,24 @@ task Spades {
File? tslrContigs
File? trustedContigs
File? untrustedContigs
- Boolean? singleCell
- Boolean? metagenomic
- Boolean? rna
- Boolean? plasmid
- Boolean? ionTorrent
- Boolean? onlyErrorCorrection
- Boolean? onlyAssembler
- Boolean? careful
- Boolean? disableGzipOutput
- Boolean? disableRepeatResolution
+ Boolean singleCell = false
+ Boolean metagenomic = false
+ Boolean rna = false
+ Boolean plasmid = false
+ Boolean ionTorrent = false
+ Boolean onlyErrorCorrection = false
+ Boolean onlyAssembler = false
+ Boolean careful = false
+ Boolean disableGzipOutput = false
+ Boolean disableRepeatResolution = false
File? dataset
- Int threads = 1
- Int memoryGb = 16
File? tmpDir
String? k
Float? covCutoff
Int? phredOffset
+
+ Int threads = 1
+ Int memoryGb = 16
}
command {
@@ -98,6 +100,6 @@ task Spades {
runtime {
cpu: threads
- memory: "~{memoryGb}G"
+ memory: "~{memoryGb}GiB"
}
-}
\ No newline at end of file
+}
diff --git a/star.wdl b/star.wdl
index 3d0e2eb0..88d3c838 100644
--- a/star.wdl
+++ b/star.wdl
@@ -24,12 +24,13 @@ task GenomeGenerate {
input {
String genomeDir = "STAR_index"
File referenceFasta
+
File? referenceGtf
Int? sjdbOverhang
Int threads = 4
- String memory = "32G"
- Int timeMinutes = ceil(size(referenceFasta, "G") * 240 / threads)
+ String memory = "32GiB"
+ Int timeMinutes = ceil(size(referenceFasta, "GiB") * 240 / threads)
String dockerImage = "quay.io/biocontainers/star:2.7.3a--0"
}
@@ -61,8 +62,10 @@ task GenomeGenerate {
File? sjdbListFromGtfOut = "~{genomeDir}/sjdbList.fromGTF.out.tab"
File? sjdbListOut = "~{genomeDir}/sjdbList.out.tab"
File? transcriptInfo = "~{genomeDir}/transcriptInfo.tab"
- Array[File] starIndex = select_all([chrLength, chrNameLength, chrName, chrStart, genome, genomeParameters,
- sa, saIndex, exonGeTrInfo, exonInfo, geneInfo, sjdbInfo, sjdbListFromGtfOut,
+ Array[File] starIndex = select_all([chrLength, chrNameLength, chrName,
+ chrStart, genome, genomeParameters,
+ sa, saIndex, exonGeTrInfo, exonInfo,
+ geneInfo, sjdbInfo, sjdbListFromGtfOut,
sjdbListOut, transcriptInfo])
}
@@ -74,16 +77,33 @@ task GenomeGenerate {
}
parameter_meta {
- genomeDir: {description:"The directory the STAR index should be written to.", categroy: "common"}
+ # inputs
+ genomeDir: {description:"The directory the STAR index should be written to.", category: "common"}
referenceFasta: {description: "The reference Fasta file.", category: "required"}
referenceGtf: {description: "The reference GTF file.", category: "common"}
sjdbOverhang: {description: "Equivalent to STAR's `--sjdbOverhang` option.", category: "advanced"}
-
threads: {description: "The number of threads to use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ chrLength: {description: "Text chromosome lengths file."}
+ chrNameLength: {description: "Text chromosome name lengths file."}
+ chrName: {description: "Text chromosome names file."}
+ chrStart: {description: "Chromosome start sites file."}
+ genome: {description: "Binary genome sequence file."}
+ genomeParameters: {description: "Genome parameters file."}
+ sa: {description: "Suffix arrays file."}
+ saIndex: {description: "Index file of suffix arrays."}
+ exonGeTrInfo: {description: "Exon, gene and transcript information file."}
+ exonInfo: {description: "Exon information file."}
+ geneInfo: {description: "Gene information file."}
+ sjdbInfo: {description: "Splice junctions coordinates file."}
+ sjdbListFromGtfOut: {description: "Splice junctions from input GTF file."}
+ sjdbListOut: {description: "Splice junction list file."}
+ transcriptInfo: {description: "Transcripts information file."}
+ starIndex: {description: "A collection of all STAR index files."}
}
}
@@ -95,6 +115,8 @@ task Star {
String outFileNamePrefix
String outSAMtype = "BAM SortedByCoordinate"
String readFilesCommand = "zcat"
+ Int outBAMcompression = 1
+
Int? outFilterScoreMin
Float? outFilterScoreMinOverLread
Int? outFilterMatchNmin
@@ -103,23 +125,22 @@ task Star {
String? twopassMode = "Basic"
Array[String]? outSAMattrRGline
String? outSAMunmapped = "Within KeepPairs"
- Int outBAMcompression = 1
Int? limitBAMsortRAM
Int runThreadN = 4
String? memory
# 1 minute initialization + time reading in index (1 minute per G) + time aligning data.
- Int timeMinutes = 1 + ceil(size(indexFiles, "G")) + ceil(size(flatten([inputR1, inputR2]), "G") * 300 / runThreadN)
+ Int timeMinutes = 1 + ceil(size(indexFiles, "GiB")) + ceil(size(flatten([inputR1, inputR2]), "GiB") * 300 / runThreadN)
String dockerImage = "quay.io/biocontainers/star:2.7.3a--0"
}
# Use a margin of 30% index size. Real memory usage is ~30 GiB for a 27 GiB index.
- Int memoryGb = 1 + ceil(size(indexFiles, "G") * 1.3)
+ Int memoryGb = 1 + ceil(size(indexFiles, "GiB") * 1.3)
# For some reason doing above calculation inside a string does not work.
# So we solve it with an optional memory string and using select_first
# in the runtime section.
- #TODO Could be extended for all possible output extensions
+ #TODO: Could be extended for all possible output extensions.
Map[String, String] samOutputNames = {"BAM SortedByCoordinate": "sortedByCoord.out.bam"}
command {
@@ -151,18 +172,20 @@ task Star {
runtime {
cpu: runThreadN
- memory: select_first([memory, "~{memoryGb}G"])
+ memory: select_first([memory, "~{memoryGb}GiB"])
time_minutes: timeMinutes
docker: dockerImage
}
parameter_meta {
+ # inputs
inputR1: {description: "The first-/single-end FastQ files.", category: "required"}
inputR2: {description: "The second-end FastQ files (in the same order as the first-end files).", category: "common"}
indexFiles: {description: "The star index files.", category: "required"}
outFileNamePrefix: {description: "The prefix for the output files. May include directories.", category: "required"}
outSAMtype: {description: "The type of alignment file to be produced. Currently only `BAM SortedByCoordinate` is supported.", category: "advanced"}
readFilesCommand: {description: "Equivalent to star's `--readFilesCommand` option.", category: "advanced"}
+ outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"}
outFilterScoreMin: {description: "Equivalent to star's `--outFilterScoreMin` option.", category: "advanced"}
outFilterScoreMinOverLread: {description: "Equivalent to star's `--outFilterScoreMinOverLread` option.", category: "advanced"}
outFilterMatchNmin: {description: "Equivalent to star's `--outFilterMatchNmin` option.", category: "advanced"}
@@ -174,9 +197,12 @@ task Star {
limitBAMsortRAM: {description: "Equivalent to star's `--limitBAMsortRAM` option.", category: "advanced"}
runThreadN: {description: "The number of threads to use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- outBAMcompression: {description: "The compression level of the output BAM.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ bamFile: {description: "Alignment file."}
+ logFinalOut: {description: "Log information file."}
}
}
diff --git a/strelka.wdl b/strelka.wdl
index 50c38b55..39afe172 100644
--- a/strelka.wdl
+++ b/strelka.wdl
@@ -29,11 +29,12 @@ task Germline {
Array[File]+ indexes
File referenceFasta
File referenceFastaFai
- File? callRegions
- File? callRegionsIndex
Boolean exome = false
Boolean rna = false
+ File? callRegions
+ File? callRegionsIndex
+
Int cores = 1
Int memoryGb = 4
Int timeMinutes = 90
@@ -61,28 +62,31 @@ task Germline {
}
runtime {
- docker: dockerImage
cpu: cores
+ memory: "~{memoryGb}GiB"
time_minutes: timeMinutes
- memory: "~{memoryGb}G"
+ docker: dockerImage
}
parameter_meta {
+ # inputs
runDir: {description: "The directory to use as run/output directory.", category: "common"}
bams: {description: "The input BAM files.", category: "required"}
indexes: {description: "The indexes for the input BAM files.", category: "required"}
referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
- callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"}
- callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"}
exome: {description: "Whether or not the data is from exome sequencing.", category: "common"}
rna: {description: "Whether or not the data is from RNA sequencing.", category: "common"}
-
+ callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"}
+ callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"}
cores: {description: "The number of cores to use.", category: "advanced"}
memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ variants: {description: "Output VCF file."}
+ variantsIndex: {description: "Index of output VCF file."}
}
}
@@ -95,11 +99,12 @@ task Somatic {
File tumorBamIndex
File referenceFasta
File referenceFastaFai
+ Boolean exome = false
+
File? callRegions
File? callRegionsIndex
File? indelCandidatesVcf
File? indelCandidatesVcfIndex
- Boolean exome = false
Int cores = 1
Int memoryGb = 4
@@ -133,13 +138,14 @@ task Somatic {
}
runtime {
- docker: dockerImage
cpu: cores
+ memory: "~{memoryGb}GiB"
time_minutes: timeMinutes
- memory: "~{memoryGb}G"
+ docker: dockerImage
}
parameter_meta {
+ # inputs
runDir: {description: "The directory to use as run/output directory.", category: "common"}
normalBam: {description: "The normal/control sample's BAM file.", category: "required"}
normalBamIndex: {description: "The index for the normal/control sample's BAM file.", category: "required"}
@@ -147,17 +153,21 @@ task Somatic {
tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"}
referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
+ exome: {description: "Whether or not the data is from exome sequencing.", category: "common"}
callRegions: {description: "The bed file which indicates the regions to operate on.", category: "common"}
callRegionsIndex: {description: "The index of the bed file which indicates the regions to operate on.", category: "common"}
indelCandidatesVcf: {description: "An indel candidates VCF file from manta.", category: "advanced"}
indelCandidatesVcfIndex: {description: "The index for the indel candidates VCF file.", category: "advanced"}
- exome: {description: "Whether or not the data is from exome sequencing.", category: "common"}
-
cores: {description: "The number of cores to use.", category: "advanced"}
memoryGb: {description: "The amount of memory this job will use in Gigabytes.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ indelsVcf: {description: "VCF containing INDELS."}
+ indelsIndex: {description: "Index of output `indelsVcf`."}
+ variants: {description: "VCF containing variants."}
+ variantsIndex: {description: "Index of output `variants`."}
}
meta {
@@ -165,4 +175,4 @@ task Somatic {
exclude: ["doNotDefineThis"]
}
}
-}
\ No newline at end of file
+}
diff --git a/stringtie.wdl b/stringtie.wdl
index 5ed62dea..fbe7e442 100644
--- a/stringtie.wdl
+++ b/stringtie.wdl
@@ -24,17 +24,19 @@ task Stringtie {
input {
File bam
File bamIndex
- File? referenceGtf
Boolean skipNovelTranscripts = false
String assembledTranscriptsFile
+
+ File? referenceGtf
Boolean? firstStranded
Boolean? secondStranded
String? geneAbundanceFile
+ Float? minimumCoverage
Int threads = 1
- String memory = "2G"
+ String memory = "2GiB"
Int timeMinutes = 1 + ceil(size(bam, "G") * 60 / threads)
- String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0"
+ String dockerImage = "quay.io/biocontainers/stringtie:1.3.6--h92e31bf_0"
}
command {
@@ -46,6 +48,7 @@ task Stringtie {
~{true="-e" false="" skipNovelTranscripts} \
~{true="--rf" false="" firstStranded} \
~{true="--fr" false="" secondStranded} \
+ ~{"-c " + minimumCoverage} \
-o ~{assembledTranscriptsFile} \
~{"-A " + geneAbundanceFile} \
~{bam}
@@ -64,19 +67,24 @@ task Stringtie {
}
parameter_meta {
+ # inputs
bam: {description: "The input BAM file.", category: "required"}
bamIndex: {description: "The input BAM file's index.", category: "required"}
- referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"}
skipNovelTranscripts: {description: "Whether new transcripts should be assembled or not.", category: "common"}
assembledTranscriptsFile: {description: "Where the output of the assembly should be written.", category: "required"}
+ referenceGtf: {description: "A reference GTF file to be used as guide.", category: "common"}
firstStranded: {description: "Equivalent to the --rf flag of stringtie.", category: "required"}
secondStranded: {description: "Equivalent to the --fr flag of stringtie.", category: "required"}
geneAbundanceFile: {description: "Where the abundance file should be written.", category: "common"}
+ minimumCoverage: {description: "The minimum coverage for a transcript to be shown in the output.", category: "advanced"}
threads: {description: "The number of threads to use.", category: "advanced"}
memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ assembledTranscripts: {description: "GTF file containing the assembled transcripts."}
+ geneAbundance: {description: "Gene abundances in tab-delimited format."}
}
}
@@ -84,18 +92,19 @@ task Merge {
input {
Array[File]+ gtfFiles
String outputGtfPath
+ Boolean keepMergedTranscriptsWithRetainedIntrons = false
+
File? guideGtf
Int? minimumLength
Float? minimumCoverage
Float? minimumFPKM
Float? minimumTPM
Float? minimumIsoformFraction
- Boolean keepMergedTranscriptsWithRetainedIntrons = false
String? label
- String memory = "10G"
+ String memory = "10GiB"
Int timeMinutes = 1 + ceil(size(gtfFiles, "G") * 20)
- String dockerImage = "quay.io/biocontainers/stringtie:1.3.4--py35_0"
+ String dockerImage = "quay.io/biocontainers/stringtie:2.1.4--h7e0af3c_0"
}
command {
@@ -125,19 +134,22 @@ task Merge {
}
parameter_meta {
+ # inputs
gtfFiles: {description: "The GTF files produced by stringtie.", category: "required"}
outputGtfPath: {description: "Where the output should be written.", category: "required"}
+ keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"}
guideGtf: {description: "Equivalent to the -G option of 'stringtie --merge'.", category: "advanced"}
minimumLength: {description: "Equivalent to the -m option of 'stringtie --merge'.", category: "advanced"}
minimumCoverage: {description: "Equivalent to the -c option of 'stringtie --merge'.", category: "advanced"}
minimumFPKM: {description: "Equivalent to the -F option of 'stringtie --merge'.", category: "advanced"}
minimumTPM: {description: "Equivalent to the -T option of 'stringtie --merge'.", category: "advanced"}
minimumIsoformFraction: {description: "Equivalent to the -f option of 'stringtie --merge'.", category: "advanced"}
- keepMergedTranscriptsWithRetainedIntrons: {description: "Equivalent to the -i flag of 'stringtie --merge'.", category: "advanced"}
label: {description: "Equivalent to the -l option of 'stringtie --merge'.", category: "advanced"}
memory: {description: "The amount of memory needed for this task in GB.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ mergedGtfFile: {description: "A merged GTF file from a set of GTF files."}
}
}
diff --git a/survivor.wdl b/survivor.wdl
index e5ac7b5b..ae246f60 100644
--- a/survivor.wdl
+++ b/survivor.wdl
@@ -1,7 +1,5 @@
version 1.0
-# MIT License
-#
# Copyright (c) 2018 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -27,14 +25,15 @@ task Merge {
Array[File] filePaths
Int breakpointDistance = 1000
Int suppVecs = 2
- Int svType = 1
- Int strandType = 1
- Int distanceBySvSize = 0
+ Boolean svType = true
+ Boolean strandType = true
+ Boolean distanceBySvSize = false
Int minSize = 30
String outputPath = "./survivor/merged.vcf"
- String memory = "24G"
+
+ String memory = "24GiB"
Int timeMinutes = 60
- String dockerImage = "quay.io/biocontainers/survivor:1.0.6--h6bb024c_0"
+ String dockerImage = "quay.io/biocontainers/survivor:1.0.7--hd03093a_2"
}
command {
@@ -45,9 +44,9 @@ task Merge {
fileList \
~{breakpointDistance} \
~{suppVecs} \
- ~{svType} \
- ~{strandType} \
- ~{distanceBySvSize} \
+ ~{true='1' false='0' svType} \
+ ~{true='1' false='0' strandType} \
+ ~{true='1' false='0' distanceBySvSize} \
~{minSize} \
~{outputPath}
}
@@ -64,16 +63,19 @@ task Merge {
parameter_meta {
# inputs
- filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR", category: "required"}
- breakpointDistance: {description: "The distance between pairwise breakpoints between SVs", category: "advanced"}
- suppVecs: {description: "The minimum number of SV callers to support the merging", category: "advanced"}
- svType: {description: "A boolean to include the type SV to be merged", category: "advanced"}
- strandType: {description: "A boolean to include strand type of an SV to be merged", category: "advanced"}
- distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size", category: "advanced"}
- minSize: {description: "The mimimum size of SV to be merged", category: "advanced"}
+ filePaths: {description: "An array of VCF files (predictions) to be merged by SURVIVOR.", category: "required"}
+ breakpointDistance: {description: "The distance between pairwise breakpoints between SVs.", category: "advanced"}
+ suppVecs: {description: "The minimum number of SV callers to support the merging.", category: "advanced"}
+ svType: {description: "A boolean to include the type SV to be merged.", category: "advanced"}
+ strandType: {description: "A boolean to include strand type of an SV to be merged.", category: "advanced"}
+ distanceBySvSize: {description: "A boolean to predict the pairwise distance between the SVs based on their size.", category: "advanced"}
+ minSize: {description: "The mimimum size of SV to be merged.", category: "advanced"}
outputPath: {description: "The location the output VCF file should be written.", category: "common"}
- memory: {description: "The memory required to run the programs", category: "advanced"}
+ memory: {description: "The memory required to run the programs.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ mergedVcf: {description: "All the vcf files specified in fileList merged."}
}
}
diff --git a/talon.wdl b/talon.wdl
index c11ab9e0..2f93e36b 100644
--- a/talon.wdl
+++ b/talon.wdl
@@ -1,6 +1,6 @@
version 1.0
-# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center
+# Copyright (c) 2019 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,10 +8,10 @@ version 1.0
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -30,7 +30,7 @@ task CreateAbundanceFileFromDatabase {
File? whitelistFile
File? datasetsFile
- String memory = "4G"
+ String memory = "4GiB"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
@@ -86,7 +86,7 @@ task CreateGtfFromDatabase {
File? whitelistFile
File? datasetFile
- String memory = "4G"
+ String memory = "4GiB"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
@@ -144,7 +144,7 @@ task FilterTalonTranscripts {
File? datasetsFile
Int? minDatasets
- String memory = "4G"
+ String memory = "4GiB"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
@@ -200,7 +200,7 @@ task GetReadAnnotations {
File? datasetFile
- String memory = "4G"
+ String memory = "4GiB"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
@@ -248,7 +248,7 @@ task GetSpliceJunctions {
String runMode = "intron"
String outputPrefix
- String memory = "4G"
+ String memory = "4GiB"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
@@ -302,7 +302,7 @@ task InitializeTalonDatabase {
Int cutOff3p = 300
String outputPrefix
- String memory = "10G"
+ String memory = "10GiB"
Int timeMinutes = 60
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
@@ -360,7 +360,7 @@ task LabelReads {
String outputPrefix
Int threads = 4
- String memory = "25G"
+ String memory = "25GiB"
Int timeMinutes = 2880
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
@@ -413,7 +413,7 @@ task ReformatGtf {
input {
File gtfFile
- String memory = "4G"
+ String memory = "4GiB"
Int timeMinutes = 30
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
@@ -454,7 +454,7 @@ task SummarizeDatasets {
File? datasetGroupsCsv
- String memory = "4G"
+ String memory = "4GiB"
Int timeMinutes = 50
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
@@ -506,7 +506,7 @@ task Talon {
String outputPrefix
Int threads = 4
- String memory = "25G"
+ String memory = "25GiB"
Int timeMinutes = 2880
String dockerImage = "biocontainers/talon:v5.0_cv1"
}
diff --git a/transcriptclean.wdl b/transcriptclean.wdl
index 79661307..8607a7a3 100644
--- a/transcriptclean.wdl
+++ b/transcriptclean.wdl
@@ -1,6 +1,6 @@
version 1.0
-# Copyright (c) 2019 Sequencing Analysis Support Core - Leiden University Medical Center
+# Copyright (c) 2019 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,10 +8,10 @@ version 1.0
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -27,7 +27,7 @@ task GetSJsFromGtf {
String outputPrefix
Int minIntronSize = 21
- String memory = "8G"
+ String memory = "8GiB"
Int timeMinutes = 30
String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1"
}
@@ -54,10 +54,10 @@ task GetSJsFromGtf {
parameter_meta {
# inputs
- gtfFile: {description: "Input gtf file", category: "required"}
- genomeFile: {description: "Reference genome", category: "required"}
- minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"}
+ gtfFile: {description: "Input gtf file.", category: "required"}
+ genomeFile: {description: "Reference genome.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
+ minIntronSize: {description: "Minimum size of intron to consider a junction.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
@@ -72,7 +72,7 @@ task GetTranscriptCleanStats {
File inputSam
String outputPrefix
- String memory = "4G"
+ String memory = "4GiB"
Int timeMinutes = 30
String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1"
}
@@ -97,7 +97,7 @@ task GetTranscriptCleanStats {
parameter_meta {
# inputs
- inputSam: {description: "Output sam file from transcriptclean", category: "required"}
+ inputSam: {description: "Output sam file from transcriptclean.", category: "required"}
outputPrefix: {description: "Output directory path + output file prefix.", category: "required"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
@@ -128,7 +128,7 @@ task TranscriptClean {
File? variantFile
Int cores = 1
- String memory = "25G"
+ String memory = "25GiB"
Int timeMinutes = 2880
String dockerImage = "biocontainers/transcriptclean:v2.0.2_cv1"
}
@@ -189,8 +189,7 @@ task TranscriptClean {
cores: {description: "The number of cores to be used.", category: "advanced"}
memory: {description: "The amount of memory available to the job.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
# outputs
fastaFile: {description: "Fasta file containing corrected reads."}
diff --git a/umi-tools.wdl b/umi-tools.wdl
index c5f3b145..d8d17c48 100644
--- a/umi-tools.wdl
+++ b/umi-tools.wdl
@@ -1,6 +1,6 @@
version 1.0
-# Copyright (c) 2017 Sequencing Analysis Support Core - Leiden University Medical Center
+# Copyright (c) 2017 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,10 +8,10 @@ version 1.0
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -26,11 +26,13 @@ task Extract {
File? read2
String bcPattern
String? bcPattern2
- Boolean threePrime = false
String read1Output = "umi_extracted_R1.fastq.gz"
String? read2Output = "umi_extracted_R2.fastq.gz"
+ Boolean threePrime = false
+
+ String memory = "20GiB"
Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 2)
- String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0"
+ String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0"
}
command {
@@ -50,21 +52,26 @@ task Extract {
}
runtime {
- docker: dockerImage
+ memory: memory
time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
read1: {description: "The first/single-end fastq file.", category: "required"}
read2: {description: "The second-end fastq file.", category: "common"}
bcPattern: {description: "The pattern to be used for UMI extraction. See the umi_tools docs for more information.", category: "required"}
bcPattern2: {description: "The pattern to be used for UMI extraction in the second-end reads. See the umi_tools docs for more information.", category: "advanced"}
- threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"}
read1Output: {description: "The location to write the first/single-end output fastq file to.", category: "advanced"}
read2Output: {description: "The location to write the second-end output fastq file to.", category: "advanced"}
+ threePrime: {description: "Whether or not the UMI's are at the reads' 3' end. If false the UMIs are extracted from the 5' end.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ extractedRead1: {description: "First read with UMI extracted to read name."}
+ extractedRead2: {description: "Second read with UMI extracted to read name."}
}
}
@@ -72,29 +79,31 @@ task Dedup {
input {
File inputBam
File inputBamIndex
- String? umiSeparator
String outputBamPath
- String? statsPrefix
+ String tmpDir = "./umiToolsDedupTmpDir"
+
Boolean paired = true
- String memory = "25G"
- Int timeMinutes = 30 + ceil(size(inputBam, "G") * 30)
+ String? umiSeparator
+ String? statsPrefix
- # Use a multi-package-container which includes umi_tools (0.5.5) and samtools (1.9)
- String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:6089936aca6219b5bb5f54210ac5eb456c7503f2-0"
+ String memory = "25GiB"
+ Int timeMinutes = 30 + ceil(size(inputBam, "GiB") * 30)
+ String dockerImage = "quay.io/biocontainers/mulled-v2-509311a44630c01d9cb7d2ac5727725f51ea43af:3067b520386698317fd507c413baf7f901666fd4-0"
}
String outputBamIndex = sub(outputBamPath, "\.bam$", ".bai")
command {
set -e
- mkdir -p "$(dirname ~{outputBamPath})"
+ mkdir -p "$(dirname ~{outputBamPath})" "~{tmpDir}"
umi_tools dedup \
- --stdin ~{inputBam} \
- --stdout ~{outputBamPath} \
+ --stdin=~{inputBam} \
+ --stdout=~{outputBamPath} \
~{"--output-stats " + statsPrefix} \
~{"--umi-separator=" + umiSeparator} \
- ~{true="--paired" false="" paired}
+ ~{true="--paired" false="" paired} \
+ --temp-dir=~{tmpDir}
samtools index ~{outputBamPath} ~{outputBamIndex}
}
@@ -107,21 +116,29 @@ task Dedup {
}
runtime {
- docker: dockerImage
- time_minutes: timeMinutes
memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
}
parameter_meta {
+ # inputs
inputBam: {description: "The input BAM file.", categrory: "required"}
inputBamIndex: {description: "The index for the ipnut BAM file.", cateogry: "required"}
outputBamPath: {description: "The location to write the output BAM file to.", category: "required"}
- statsPrefix: {description: "The prefix for the stats files.", category: "advanced"}
- umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"}
+ tmpDir: {description: "Temporary directory.", category: "advanced"}
paired: {description: "Whether or not the data is paired.", category: "common"}
+ umiSeparator: {description: "Seperator used for UMIs in the read names.", category: "advanced"}
+ statsPrefix: {description: "The prefix for the stats files.", category: "advanced"}
memory: {description: "The amount of memory required for the task.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ deduppedBam: {description: "Deduplicated BAM file."}
+ deduppedBamIndex: {description: "Index of the deduplicated BAM file."}
+ editDistance: {description: "Report of the (binned) average edit distance between the UMIs at each position."}
+ umiStats: {description: "UMI-level summary statistics."}
+ positionStats: {description: "The counts for unique combinations of UMI and position."}
}
}
diff --git a/umi.wdl b/umi.wdl
new file mode 100644
index 00000000..0628783a
--- /dev/null
+++ b/umi.wdl
@@ -0,0 +1,107 @@
+version 1.0
+
+# Copyright (c) 2022 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task BamReadNameToUmiTag {
+
+ # This task processes a bam file with reads that have been extracted with
+ # umi-tools extract. The UMI is extracted from the read name again and put
+ # in the bam file again with umiTag (default RX)
+ input {
+ File inputBam
+ String outputPath = "output.bam"
+ String umiTag = "RX"
+ String separatorChar = "_"
+
+ String memory = "2GiB"
+ Int timeMinutes = 1 + ceil(size([inputBam], "GiB") * 10)
+ String dockerImage = "quay.io/biocontainers/pysam:0.17.0--py39h051187c_0"
+ }
+
+ String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai")
+
+ command <<<
+ python < Tuple[str, str]:
+ id_and_rest = name.split(maxsplit=1)
+ id = id_and_rest[0]
+ # If there was no whitespace id_and_rest will have length 1
+ other_parts = id_and_rest[1] if len(id_and_rest) == 2 else ""
+ underscore_index = id.rfind(separator_char)
+ umi = id[underscore_index + 1:]
+ new_id = id[:underscore_index]
+ if other_parts:
+ return " ".join([new_id, other_parts]), umi
+ return new_id, umi
+
+ def annotate_umis(in_file, out_file, bam_tag="RX", separator_char = "_"):
+ in_bam = pysam.AlignmentFile(in_file, "rb")
+ os.makedirs(os.path.dirname(out_file), exist_ok=True)
+ out_bam = pysam.AlignmentFile(out_file, "wb", template=in_bam)
+ # Encode bam_tag as bytes. Otherwise pysam converts it to bytes anyway.
+ encoded_bam_tag = bam_tag.encode('ascii')
+ for segment in in_bam: # type: pysam.AlignedSegment
+ new_name, umi = split_umi_from_name(segment.query_name, separator_char)
+ segment.query_name = new_name
+ # Encode umi as ascii. Otherwise pysam encodes it to bytes anyway.
+ # Value type has to be a string though, otherwise pysam crashes.
+ segment.set_tag(encoded_bam_tag, umi.encode('ascii'), value_type="Z")
+ out_bam.write(segment)
+
+ if __name__ == "__main__":
+ annotate_umis("~{inputBam}", "~{outputPath}", "~{umiTag}", "~{separatorChar}")
+ pysam.index("~{outputPath}", "~{bamIndexPath}", b=True)
+ CODE
+ >>>
+
+ output {
+ File outputBam = outputPath
+ File outputBamIndex = bamIndexPath
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ inputBam: {description: "The input SAM file.", category: "required"}
+ outputPath: {description: "Output directory path + output file.", category: "common"}
+ umiTag: {description: "The tag used for UMIs in the output BAM file.", category: "common"}
+ separatorChar: {description: "Character used to separate the UMIs from the read name.", category: "common"}
+
+ memory: {description: "The amount of memory available to the job.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputBam: {description: "Sorted BAM file."}
+ outputBamIndex: {description: "Sorted BAM file index."}
+ }
+}
diff --git a/unicycler.wdl b/unicycler.wdl
index fc393603..d83db3ca 100644
--- a/unicycler.wdl
+++ b/unicycler.wdl
@@ -22,12 +22,13 @@ version 1.0
task Unicycler {
input {
+ String out
+
String? preCommand
File? short1
File? short2
File? unpaired
File? long
- String out
Int? verbosity
Int? minFastaLength
Int? keep
@@ -65,7 +66,7 @@ task Unicycler {
String? lowScore
Int threads = 1
- String memory = "4G"
+ String memory = "4GiB"
}
command {
@@ -125,4 +126,4 @@ task Unicycler {
cpu: threads
memory: memory
}
-}
\ No newline at end of file
+}
diff --git a/vardict.wdl b/vardict.wdl
index 92beb32e..187b4567 100644
--- a/vardict.wdl
+++ b/vardict.wdl
@@ -27,29 +27,28 @@ task VarDict {
String tumorSampleName
File tumorBam
File tumorBamIndex
- String? normalSampleName
- File? normalBam
- File? normalBamIndex
File referenceFasta
File referenceFastaFai
File bedFile
String outputVcf
-
- Int chromosomeColumn = 1
- Int startColumn = 2
- Int endColumn = 3
- Int geneColumn = 4
-
Boolean outputCandidateSomaticOnly = true
Boolean outputAllVariantsAtSamePosition = true
Float mappingQuality = 20
Int minimumTotalDepth = 8
Int minimumVariantDepth = 4
Float minimumAlleleFrequency = 0.02
+ Int chromosomeColumn = 1
+ Int startColumn = 2
+ Int endColumn = 3
+ Int geneColumn = 4
+
+ String? normalSampleName
+ File? normalBam
+ File? normalBamIndex
- Int threads = 1
- String memory = "18G"
String javaXmx = "16G"
+ Int threads = 1
+ String memory = "18GiB"
Int timeMinutes = 300
String dockerImage = "quay.io/biocontainers/vardict-java:1.5.8--1"
}
@@ -93,33 +92,34 @@ task VarDict {
}
parameter_meta {
+ # inputs
tumorSampleName: {description: "The name of the tumor/case sample.", category: "required"}
tumorBam: {description: "The tumor/case sample's BAM file.", category: "required"}
tumorBamIndex: {description: "The index for the tumor/case sample's BAM file.", category: "required"}
- normalSampleName: {description: "The name of the normal/control sample.", category: "common"}
- normalBam: {description: "The normal/control sample's BAM file.", category: "common"}
- normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"}
referenceFasta: {description: "The reference fasta file.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
bedFile: {description: "A bed file describing the regions to operate on. These regions must be below 1e6 bases in size.", category: "required"}
outputVcf: {description: "The location to write the output VCF file to.", category: "required"}
- chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"}
- startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"}
- endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"}
- geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"}
outputCandidateSomaticOnly: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-M` flag.", category: "advanced"}
outputAllVariantsAtSamePosition: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-A` flag.", category: "advanced"}
mappingQuality: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-Q` option.", category: "advanced"}
minimumTotalDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-d` option.", category: "advanced"}
minimumVariantDepth: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-v` option.", category: "advanced"}
minimumAlleleFrequency: {description: "Equivalent to var2vcf_paired.pl or var2vcf_valid.pl's `-f` option.", category: "advanced"}
-
+ chromosomeColumn: {description: "Equivalent to vardict-java's `-c` option.", category: "advanced"}
+ startColumn: {description: "Equivalent to vardict-java's `-S` option.", category: "advanced"}
+ endColumn: {description: "Equivalent to vardict-java's `-E` option.", category: "advanced"}
+ geneColumn: {description: "Equivalent to vardict-java's `-g` option.", category: "advanced"}
+ normalSampleName: {description: "The name of the normal/control sample.", category: "common"}
+ normalBam: {description: "The normal/control sample's BAM file.", category: "common"}
+ normalBamIndex: {description: "The normal/control sample's BAM file.", category: "common"}
+ javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.", category: "advanced"}
threads: {description: "The number of threads to use.", category: "advanced"}
memory: {description: "The amount of memory this job will use.", category: "advanced"}
- javaXmx: {description: "The maximum memory available to the program. Should be lower than `memory` to accommodate JVM overhead.",
- category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
- dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
- category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ vcfFile: {description: "Output VCF file."}
}
}
diff --git a/vep.wdl b/vep.wdl
new file mode 100644
index 00000000..2c1f923b
--- /dev/null
+++ b/vep.wdl
@@ -0,0 +1,110 @@
+version 1.0
+
+# Copyright (c) 2017 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Vep {
+ input {
+ File inputFile
+ String outputPath = "vep.annotated.vcf.gz"
+ File cacheTar
+ File? pluginsTar
+ String? species
+ Array[String] plugins = []
+ Boolean refseq = false
+ Boolean merged = false
+
+ Boolean everything = false
+ Boolean symbol = false
+
+ String memory = "8GiB"
+ # Account time for unpacking the cache.
+ Int timeMinutes = 1 + ceil(size(cacheTar, "GiB")) + ceil(size(inputFile, "MiB") * 15)
+ String dockerImage = "quay.io/biocontainers/ensembl-vep:113.3--pl5321h2a3209d_0"
+ }
+
+ command <<<
+ set -eu
+ mkdir vep_cache
+ mkdir -p "$(dirname ~{outputPath})"
+ tar -x --directory vep_cache -f ~{cacheTar}
+ ~{"tar -x --directory vep_cache -f " + pluginsTar}
+
+ # Make sure vep can error, so the removal always succeeds.
+ set +e
+ # Output all stats files by default for MultiQC integration
+ vep \
+ --input_file ~{inputFile} \
+ --output_file ~{outputPath} \
+ ~{"--species " + species} \
+ --stats_html --stats_text \
+ --dir vep_cache \
+ --offline \
+ ~{true="--plugin" false="" length(plugins) > 0} ~{sep=" --plugin " plugins} \
+ --vcf \
+ --compress_output bgzip \
+ ~{true="--refseq" false="" refseq} \
+ ~{true="--merged" false="" merged} \
+ ~{true="--everything" false="" everything} \
+ ~{true="--symbol" false="" symbol}
+
+ VEP_EXIT_CODE=$?
+ set -e
+ # Cleanup the tar extract to save filesystem space
+ rm -rf vep_cache
+
+ exit $VEP_EXIT_CODE
+ >>>
+
+ output {
+ File outputFile = outputPath
+ File statsHtml = outputPath + "_summary.html"
+ File statsTxt = outputPath + "_summary.txt"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # input
+ inputFile: {description: "The VCF to annotate.", category: "required"}
+ outputPath: {description: "Where to put the output file", category: "advanced"}
+ cacheTar: {description: "A TAR archive containing the cache. The TAR archives from the VEP website work (http://www.ensembl.org/info/docs/tools/vep/script/vep_cache.html)", category: "required"}
+ pluginsTar: {description: "A TAR file with custom plugins.", category: "advanced"}
+ species: {description: "Which species cache to use", category: "common"}
+ plugins: {description: "Which plugins to use", category: "common"}
+ refseq: {description: "Use the refseq cache", category: "common"}
+ merged: {description: "Use the merged cache", category: "common"}
+ everything: {description: "Use all annotation sources bundeld with vep.", category: "common"}
+ symbol: {description: "Add the gene symbol to the output where available", category: "advanced"}
+
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # output
+ outputFile: {description: "The annotated VEP VCF file."}
+ statsHtml: {description: "The VEP summary stats HTML file."}
+ statsTxt: {description: "The VEP summary stats TXT file."}
+ }
+}
diff --git a/vt.wdl b/vt.wdl
index d4c134b9..635641e9 100644
--- a/vt.wdl
+++ b/vt.wdl
@@ -1,6 +1,6 @@
version 1.0
-# Copyright (c) 2020 Sequencing Analysis Support Core - Leiden University Medical Center
+# Copyright (c) 2020 Leiden University Medical Center
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -8,10 +8,10 @@ version 1.0
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
-
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -26,23 +26,40 @@ task Normalize {
File inputVCFIndex
File referenceFasta
File referenceFastaFai
- String outputPath = "./vt/normalized_decomposed.vcf"
- String dockerImage = "quay.io/biocontainers/vt:0.57721--hdf88d34_2"
- String memory = "4G"
- Int timeMinutes = 30
+ Boolean ignoreMaskedRef = false
+ String outputPath = "./vt/normalized_decomposed.vcf.gz"
+ String? filterExpression
+
+ Int compressionLevel = 1
+
+ String memory = "4GiB"
+ Int timeMinutes = 10 + ceil(size(inputVCF, "GiB") * 240)
+ String dockerImage = "quay.io/biocontainers/vt:0.57721--h2419454_12"
}
command {
- set -e
+ set -eo pipefail
mkdir -p "$(dirname ~{outputPath})"
- vt normalize ~{inputVCF} -r ~{referenceFasta} | vt decompose -s - -o ~{outputPath}
+ vt view -h \
+ ~{"-f '" + filterExpression}~{true="'" false="" defined(filterExpression)} \
+ ~{inputVCF} \
+ | vt normalize - \
+ -r ~{referenceFasta} \
+ ~{true="-m " false="" ignoreMaskedRef} \
+ | vt decompose -s - \
+ | vt view - \
+ -c ~{compressionLevel} \
+ -o ~{outputPath}
+ vt index ~{outputPath}
}
output {
File outputVcf = outputPath
+ File outputVcfIndex = outputPath + ".tbi"
}
runtime {
+ cpu: 1
memory: memory
time_minutes: timeMinutes
docker: dockerImage
@@ -52,12 +69,19 @@ task Normalize {
# inputs
inputVCF: {description: "The VCF file to process.", category: "required"}
inputVCFIndex: {description: "The index of the VCF file to be processed.", category: "required"}
- outputPath: {description: "The location the output VCF file should be written.", category: "common"}
referenceFasta: {description: "The reference fasta file which was also used for mapping.", category: "required"}
referenceFastaFai: {description: "The index for the reference fasta file.", category: "required"}
- memory: {description: "The memory required to run the programs", category: "advanced"}
+ ignoreMaskedRef: {description: "Warns but does not exit when REF is inconsistent with masked reference sequence for non SNPs.", category: "advanced"}
+ outputPath: {description: "The location the output VCF file should be written.", category: "common"}
+ filterExpression: {description: "See https://genome.sph.umich.edu/wiki/Vt#Filters for valid expressions.", category: "common"}
+ compressionLevel: {description: "Compression level for the out vcf.gz file.", category: "advanced"}
+
+ memory: {description: "The memory required to run the programs.", category: "advanced"}
timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ outputVcf: {description: "Normalized and decomposed VCF file."}
+ outputVcfIndex: {description: "Index for normalized and decomposed VCF file."}
}
}
-
diff --git a/whatshap.wdl b/whatshap.wdl
new file mode 100644
index 00000000..b491f566
--- /dev/null
+++ b/whatshap.wdl
@@ -0,0 +1,233 @@
+version 1.0
+
+# Copyright (c) 2018 Leiden University Medical Center
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+task Phase {
+ input {
+ String outputVCF
+ File vcf
+ File vcfIndex
+ File phaseInput
+ File phaseInputIndex
+
+ File? reference
+ File? referenceIndex
+ String? tag
+ String? algorithm
+ Boolean? indels
+ String? sample
+ String? chromosome
+ String? threshold
+ String? ped
+
+ String memory = 2 + ceil(size(phaseInput, "G") / 20 )
+ Int timeMinutes = 400 + ceil(size(phaseInput, "G") * 0.9 )
+
+ # Whatshap 1.0, tabix 0.2.5.
+ String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0"
+ }
+
+ command {
+ set -e
+
+ mkdir -p $(dirname ~{outputVCF})
+
+ whatshap phase \
+ ~{vcf} \
+ ~{phaseInput} \
+ ~{if defined(outputVCF) then ("--output " + '"' + outputVCF + '"') else ""} \
+ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \
+ ~{if defined(tag) then ("--tag " + '"' + tag + '"') else ""} \
+ ~{if defined(algorithm) then ("--algorithm " + '"' + algorithm + '"') else ""} \
+ ~{true="--indels" false="" indels} \
+ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \
+ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""} \
+ ~{if defined(threshold) then ("--threshold " + '"' + threshold + '"') else ""} \
+ ~{if defined(ped) then ("--ped " + '"' + ped + '"') else ""}
+
+ tabix -p vcf ~{outputVCF}
+ }
+
+ output {
+ File phasedVCF = outputVCF
+ File phasedVCFIndex = outputVCF + ".tbi"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ outputVCF: {description: "Output VCF file. Add .gz to the file name to get compressed output. If omitted, use standard output.", category: "common"}
+ vcf: {description: "VCF or BCF file with variants to be phased (can be gzip-compressed).", category: "required"}
+ vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"}
+ phaseInput: {description: "BAM, CRAM, VCF or BCF file(s) with phase information, either through sequencing reads (BAM, CRAM) or through phased blocks (VCF, BCF).", category: "required"}
+ phaseInputIndex: {description: "Index of BAM, CRAM, VCF or BCF file(s) with phase information.", category: "required"}
+ reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"}
+ referenceIndex: {description: "Index of reference file.", category: "common"}
+ tag: {description: "Store phasing information with PS tag (standardized) or HP tag (used by GATK ReadBackedPhasing) (default: {description: PS).", category: "common"}
+ algorithm: {description: "Phasing algorithm to use (default: {description: whatshap).", category: "advanced"}
+ indels: {description: "Also phase indels (default: {description: do not phase indels).", category: "common"}
+ sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"}
+ chromosome: {description: "Name of chromosome to phase. If not given, all chromosomes in the input VCF are phased. Can be used multiple times.", category: "common"}
+ threshold: {description: "The threshold of the ratio between the probabilities that a pair of reads come from the same haplotype and different haplotypes in the read merging model (default: {description: 1000000).", category: "advanced"}
+ ped: {description: "Use pedigree information in PED file to improve phasing (switches to PedMEC algorithm). Columns 2, 3, 4 must refer to child, mother, and father sample names as used in the VCF and BAM/CRAM. Other columns are ignored.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ phasedVCF: {description: "VCF file containing phased variants."}
+ phasedVCFIndex: {description: "Index of phased VCF file."}
+ }
+}
+
+task Stats {
+ input {
+ File vcf
+
+ String? gtf
+ String? sample
+ String? tsv
+ String? blockList
+ String? chromosome
+
+ String memory = "4GiB"
+ Int timeMinutes = 30
+ # Whatshap 1.0, tabix 0.2.5.
+ String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0"
+ }
+
+ command {
+ set -e
+
+ mkdir -p $(dirname ~{tsv})
+
+ whatshap stats \
+ ~{vcf} \
+ ~{if defined(gtf) then ("--gtf " + '"' + gtf + '"') else ""} \
+ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""} \
+ ~{if defined(tsv) then ("--tsv " + '"' + tsv + '"') else ""} \
+ ~{if defined(blockList) then ("--block-list " + '"' + blockList + '"') else ""} \
+ ~{if defined(chromosome) then ("--chromosome " + '"' + chromosome + '"') else ""}
+ }
+
+ output {
+ File? phasedGTF = gtf
+ File? phasedTSV = tsv
+ File? phasedBlockList = blockList
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ vcf: {description: "Phased VCF file.", category: "required"}
+ gtf: {description: "Write phased blocks to GTF file.", category: "common"}
+ sample: {description: "Name of the sample to process. If not given, use first sample found in VCF.", category: "common"}
+ tsv: {description: "Filename to write statistics to (tab-separated).", category: "common"}
+ blockList: {description: "Filename to write list of all blocks to (one block per line).", category: "advanced"}
+ chromosome: {description: "Name of chromosome to process. If not given, all chromosomes in the input VCF are considered.", category: "advanced"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ phasedGTF: {description: "Phasing statistics for a single VCF file."}
+ phasedTSV: {description: "Statistics in a tab-separated value format."}
+ phasedBlockList: {description: "List of the total number of phase sets/blocks."}
+ }
+}
+
+task Haplotag {
+ input {
+ File vcf
+ File vcfIndex
+ File alignments
+ File alignmentsIndex
+ String outputFile
+
+ File? reference
+ File? referenceFastaIndex
+ String? regions
+ String? sample
+
+ String memory = 2 + ceil(size(alignments, "G") / 50 )
+ Int timeMinutes = 50 + ceil(size(alignments, "G") * 2 )
+
+ # Whatshap 1.0, tabix 0.2.5.
+ String dockerImage = "quay.io/biocontainers/mulled-v2-5c61fe1d8c284dd05d26238ce877aa323205bf82:89b4005d04552bdd268e8af323df83357e968d83-0"
+ }
+
+ command {
+ set -e
+
+ mkdir -p $(dirname ~{outputFile})
+
+ whatshap haplotag \
+ ~{vcf} \
+ ~{alignments} \
+ ~{if defined(outputFile) then ("--output " + '"' + outputFile+ '"') else ""} \
+ ~{if defined(reference) then ("--reference " + '"' + reference + '"') else ""} \
+ ~{if defined(regions) then ("--regions " + '"' + regions + '"') else ""} \
+ ~{if defined(sample) then ("--sample " + '"' + sample + '"') else ""}
+
+ python3 -c "import pysam; pysam.index('~{outputFile}')"
+ }
+
+ output {
+ File bam = outputFile
+ File bamIndex = outputFile + ".bai"
+ }
+
+ runtime {
+ memory: memory
+ time_minutes: timeMinutes
+ docker: dockerImage
+ }
+
+ parameter_meta {
+ # inputs
+ vcf: {description: "VCF file with phased variants (must be gzip-compressed and indexed).", category: "required"}
+ vcfIndex: {description: "Index for the VCF or BCF file with variants to be phased.", category: "required"}
+ alignments: {description: "File (BAM/CRAM) with read alignments to be tagged by haplotype.", category: "required"}
+ alignmentsIndex: {description: "Index for the alignment file.", category: "required"}
+ outputFile: {description: "Output file. If omitted, use standard output.", category: "required"}
+ reference: {description: "Reference file. Provide this to detect alleles through re-alignment. If no index (.fai) exists, it will be created.", category: "common"}
+ referenceFastaIndex: {description: "Index for the reference file.", category: "common"}
+ regions: {description: "Specify region(s) of interest to limit the tagging to reads/variants overlapping those regions. You can specify a space-separated list of regions in the form of chrom:start-end, chrom (consider entire chromosome), or chrom:start (consider region from this start to end of chromosome).", category: "advanced"}
+ sample: {description: "Name of a sample to phase. If not given, all samples in the input VCF are phased. Can be used multiple times.", category: "common"}
+ memory: {description: "The amount of memory this job will use.", category: "advanced"}
+ timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
+ dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
+
+ # outputs
+ bam: {description: "BAM file containing tagged reads for haplotype."}
+ bamIndex: {description: "Index of the tagged BAM file."}
+ }
+}
diff --git a/wisestork.wdl b/wisestork.wdl
index 0fd812b1..bef54e27 100644
--- a/wisestork.wdl
+++ b/wisestork.wdl
@@ -22,13 +22,16 @@ version 1.0
task Count {
input {
- Int? binSize
- File reference
- File referenceIndex
- File? binFile
File inputBam
File inputBamIndex
+ File reference
+ File referenceIndex
String outputBed = "output.bed"
+
+ Int? binSize
+ File? binFile
+
+ String memory = "2GiB"
String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0"
}
@@ -48,21 +51,25 @@ task Count {
}
runtime {
+ memory: memory
docker: dockerImage
}
}
task GcCorrect {
input {
- Int? binSize
File reference
File referenceIndex
- File? binFile
File inputBed
String outputBed = "output.bed"
+
+ Int? binSize
+ File? binFile
Float? fracN
Int? iter
Float? fracLowess
+
+ String memory = "2GiB"
String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0"
}
@@ -85,19 +92,23 @@ task GcCorrect {
}
runtime {
+ memory: memory
docker: dockerImage
}
}
task Newref {
input {
- Int? binSize
File reference
File referenceIndex
- File? binFile
Array[File]+ inputBeds
String outputBed = "output.bed"
+
+ Int? binSize
+ File? binFile
Int? nBins
+
+ Int memory = 2 + ceil(length(inputBeds) * 0.15)
String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0"
}
@@ -106,36 +117,37 @@ task Newref {
mkdir -p $(dirname ~{outputBed})
wisestork newref \
~{"--binsize " + binSize} \
- --reference ~{reference} \
- ~{"--bin-file " + binFile} \
- --output ~{outputBed} \
- -I ~{sep=" -I " inputBeds} \
- ~{"--n-bins " + nBins}
+ --reference ~{reference} \
+ ~{"--bin-file " + binFile} \
+ --output ~{outputBed} \
+ -I ~{sep=" -I " inputBeds} \
+ ~{"--n-bins " + nBins}
}
output {
File bedFile = outputBed
}
- Int memory = 2 + ceil(length(inputBeds) * 0.15)
-
runtime {
+ memory: "~{memory}GiB"
docker: dockerImage
- memory: "~{memory}G"
}
}
task Zscore {
input {
- Int? binSize
File reference
File referenceIndex
- File? binFile
File inputBed
File inputBedIndex
File dictionaryFile
File dictionaryFileIndex
String outputBed = "output.bed"
+
+ Int? binSize
+ File? binFile
+
+ String memory = "2GiB"
String dockerImage = "quay.io/biocontainers/wisestork:0.1.2--pyh24bf2e0_0"
}
@@ -156,7 +168,7 @@ task Zscore {
}
runtime {
+ memory: memory
docker: dockerImage
}
}
-