diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml index 243e7823..bf44d961 100644 --- a/.github/actions/nf-test/action.yml +++ b/.github/actions/nf-test/action.yml @@ -54,13 +54,9 @@ runs: conda-solver: libmamba conda-remove-defaults: true - # TODO Skip failing conda tests and document their failures - # https://github.com/nf-core/modules/issues/7017 - name: Run nf-test shell: bash env: - NFT_DIFF: ${{ env.NFT_DIFF }} - NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }} NFT_WORKDIR: ${{ env.NFT_WORKDIR }} run: | nf-test test \ diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index f2d7d1dd..8b0f88c3 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -13,7 +13,7 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - name: Set up Python 3.12 + - name: Set up Python 3.13 uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: python-version: "3.13" diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 7e8050fb..d43797d9 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@4c1e823582f43b179e2cbb49c3eade4e41f992e2 # v10 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml index 55b6e759..afcd1fd0 100644 --- a/.github/workflows/nf-test.yml +++ b/.github/workflows/nf-test.yml @@ -1,12 +1,5 @@ name: Run nf-test on: - push: - paths-ignore: - - "docs/**" - - "**/meta.yml" - - "**/*.md" - - "**/*.png" - - "**/*.svg" pull_request: paths-ignore: - "docs/**" @@ -25,6 +18,7 @@ concurrency: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NFT_TAGS: "cicd" NFT_VER: "0.9.2" NFT_WORKDIR: "~" NXF_ANSI_LOG: false @@ -35,7 +29,7 @@ jobs: nf-test-changes: name: nf-test-changes runs-on: # use self-hosted runners - - runs-on=$-nf-test-changes + - runs-on=${{ github.run_id }}-nf-test-changes - runner=4cpu-linux-x64 outputs: shard: ${{ steps.set-shards.outputs.shard }} @@ -58,6 +52,7 @@ jobs: NFT_VER: ${{ env.NFT_VER }} with: max_shards: 7 + tags: ${{ env.NFT_TAGS }} - name: debug run: | @@ -69,7 +64,7 @@ jobs: needs: [nf-test-changes] if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} runs-on: # use self-hosted runners - - runs-on=$-nf-test + - runs-on=${{ github.run_id }}-nf-test - runner=4cpu-linux-x64 - disk=large strategy: @@ -86,7 +81,7 @@ jobs: - isMain: false profile: "singularity" NXF_VER: - - "24.04.2" + - "24.10.5" - "latest-everything" env: NXF_ANSI_LOG: false @@ -98,23 +93,40 @@ jobs: fetch-depth: 0 - name: Run nf-test + id: run_nf_test uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} env: - NFT_DIFF: ${{ env.NFT_DIFF }} - NFT_DIFF_ARGS: ${{ env.NFT_DIFF_ARGS }} NFT_WORKDIR: ${{ env.NFT_WORKDIR }} with: profile: ${{ matrix.profile }} shard: ${{ matrix.shard }} total_shards: ${{ env.TOTAL_SHARDS }} + tags: ${{ env.NFT_TAGS }} + + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + confirm-pass: needs: [nf-test] if: always() runs-on: # use self-hosted runners - - runs-on=$-confirm-pass + - runs-on=${{ github.run_id }}-confirm-pass - runner=2cpu-linux-x64 steps: - - name: One or more tests failed + - name: One or more tests failed (excluding latest-everything) if: ${{ contains(needs.*.result, 'failure') }} run: exit 1 @@ -133,11 +145,3 @@ jobs: echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" echo "::endgroup::" - - - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner - if: always() - run: | - ls -la ./ - rm -rf ./* || true - rm -rf ./.??* || true - ls -la ./ diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml index 4abaf484..0f732495 100644 --- a/.github/workflows/release-announcements.yml +++ b/.github/workflows/release-announcements.yml @@ -30,7 +30,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@4aa83560bb3eac05dbad1e5f221ee339118abdd2 # v0.2.0 + - uses: zentered/bluesky-post-action@6461056ea355ea43b977e149f7bf76aaa572e5e8 # v0.3.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.nf-core.yml b/.nf-core.yml index 6e85cf9a..d7927b18 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,5 +1,5 @@ repository_type: pipeline -nf_core_version: 3.3.1 +nf_core_version: 3.3.2 template: author: Stephen Watts description: A comprehensive cancer DNA/RNA analysis and reporting pipeline @@ -8,13 +8,12 @@ template: name: oncoanalyser org: nf-core outdir: . - version: 2.1.0 skip_features: + - gpu - nf-test + version: 2.2.0 lint: actions_ci: false - multiqc_config: false - nf_test_content: false files_exist: - lib/Utils.groovy - lib/WorkflowMain.groovy @@ -23,7 +22,9 @@ lint: - assets/nf-core-oncoanalyser_logo_light.png - docs/images/nf-core-oncoanalyser_logo_dark.png - docs/images/nf-core-oncoanalyser_logo_light.png + multiqc_config: false nextflow_config: - config_defaults: - params.fastp_umi_length - params.fastp_umi_skip + nf_test_content: false diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9d0b248d..bb41beec 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: prettier additional_dependencies: - - prettier@3.5.0 + - prettier@3.6.2 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a6cd2a4..94543a53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,67 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project mostly adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[2.2.0](https://github.com/nf-core/oncoanalyser/releases/tag/2.2.0)] Royal Spoonbill - 2025-08-02 + +- [241](https://github.com/nf-core/oncoanalyser/pull/241) - Apply minor fixes and updates + - Allow 'prepare reference' feature to be driven by samplesheet + - Set minimum stringr / stringi version for CUPPA environment + - Reintroduce decoy sequences for ESVEE with GRCh37 genomes + - Update WiGiTS reference data paths + - Improve FASTQ and longitudinal sample input handling + - Fix REDUX TSV collection in SAGE append subworkflow + - Update CHANGELOG.md +- [235](https://github.com/nf-core/oncoanalyser/pull/235) - Publish selected command / log files +- [234](https://github.com/nf-core/oncoanalyser/pull/234) - Update WiGiTS tools and reference data +- [233](https://github.com/nf-core/oncoanalyser/pull/233) - Update documentation +- [232](https://github.com/nf-core/oncoanalyser/pull/232) - Extend the 'prepare reference' functionality +- [231](https://github.com/nf-core/oncoanalyser/pull/231) - Implement 'purity estimate' (WISP) workflow +- [230](https://github.com/nf-core/oncoanalyser/pull/230) - Implement 'panel resource creation' workflow +- [220](https://github.com/nf-core/oncoanalyser/pull/220) - Add reports to tower.yml +- [222](https://github.com/nf-core/oncoanalyser/pull/222) - Post-release bump + +### Software dependencies + +| Dependency | Old version | New version | +| ------------------ | ----------- | ----------- | +| `AMBER` | 4.1.1 | 4.2 | +| `BamTools` | 1.3 | 1.4.2 | +| `bwa-plus` | 1.0.0 | - | +| `bwa-mem2` | - | 2.3 | +| `CHORD` | 2.1.0 | 2.1.2 | +| `COBALT` | 2.0 | 2.1 | +| `ESVEE` | 1.0.3 | 1.1.2 | +| `ISOFOX` | 1.7.1 | 1.7.2 | +| `LILAC` | 1.6 | 1.7.1 | +| `LINX` | 2.0.2 | 2.1 | +| `NEO` | 1.2 | 1.2.1 | +| `ORANGE` | 3.8.1 | 4.1 | +| `PAVE` | 1.7.1 | 1.8 | +| `PURPLE` | 4.1 | 4.2 | +| `REDUX` | 1.1.2 | 1.2 | +| `SAGE` | 4.0 | 4.1 | +| `VirusInterpreter` | 1.7 | 1.7.1 | +| `WISP` | - | 1.2 | + +### Reference data + +| Name | Old version | New version | +| ------------------------ | ----------- | ----------- | +| `HMF pipeline resources` | `2.1.0--1` | `2.2.0--3` | +| `HMF TSO500 resources` | `2.0.0--3` | `2.2.0--3` | + +### Parameters + +| Old name | New name | +| ----------- | ---------------------- | +| `fastp_umi` | `fastp_umi_enabled` | +| `redux_umi` | `redux_umi_enabled` | +| - | `purity_estimate_mode` | +| - | `ref_data_types` | +| - | `driver_gene_panel` | +| - | `target_regions_bed` | +| - | `hmftools_log_level` | + ## [[2.1.0](https://github.com/nf-core/oncoanalyser/releases/tag/2.1.0)] Peruvian Pelican - 2025-06-30 - [219](https://github.com/nf-core/oncoanalyser/pull/219) - Add metromap-style diagram for pipeline overview diff --git a/README.md b/README.md index e4d7f621..6cd186ca 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,14 @@ -[![GitHub Actions CI Status](https://github.com/nf-core/oncoanalyser/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/ci.yml) +[![GitHub Actions CI Status](https://github.com/nf-core/oncoanalyser/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/nf-test.yml) [![GitHub Actions Linting Status](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/oncoanalyser/results) [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.15189386-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.15189386) [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) -[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) -[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1) +[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/) +[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -25,27 +25,35 @@ ## Introduction -**nf-core/oncoanalyser** is a Nextflow pipeline for the comprehensive analysis of cancer genomes and transcriptomes +**nf-core/oncoanalyser** is a Nextflow pipeline for the comprehensive analysis of cancer DNA and RNA sequencing data using the [WiGiTS](https://github.com/hartwigmedical/hmftools) toolkit from the Hartwig Medical Foundation. The pipeline supports a wide range of experimental setups: -- FASTQ, BAM, or CRAM input files -- WGS (whole genome sequencing), WTS (whole transcriptome sequencing), and targeted / panel sequencing (built-in support - for the [TSO500 - panel](https://sapac.illumina.com/products/by-type/clinical-research-products/trusight-oncology-500.html) with other - panels and exome requiring [panel reference data - generation](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/README_TARGETED.md)) -- Paired tumor / normal and tumor-only sample setups, donor sample support for further normal subtraction (e.g. for - patients with bone marrow transplants or other contaminants in the tumor) +- FASTQ, BAM, and / or CRAM input files +- WGS (whole genome sequencing), WTS (whole transcriptome sequencing), and targeted / panel sequencing1 +- Paired tumor / normal and tumor-only samples, and support for donor samples for further normal subtraction +- Purity estimate for longitudinal samples using genomic features of the primary sample from the same patient2 - UMI (unique molecular identifier) processing supported for DNA sequencing data - Most GRCh37 and GRCh38 reference genome builds +1 built-in support for the [TSO500 +panel](https://www.illumina.com/products/by-type/clinical-research-products/trusight-oncology-500.html) with other +panels and exomes requiring [creation of custom panel reference +data](https://nf-co.re/oncoanalyser/usage#custom-panels) +
+2 for example a primary WGS tissue biospy and longitudinal low-pass WGS ccfDNA sample taken from the +same patient + ## Pipeline overview

-The pipeline mainly uses tools from [WiGiTS](https://github.com/hartwigmedical/hmftools), as well as some external -tools. Due to the limitations of panel data, certain tools (indicated with `*` below) do not run in `targeted` mode. +The pipeline mainly uses tools from [WiGiTS](https://github.com/hartwigmedical/hmftools), as well as some other external +tools. There are [several workflows available](https://nf-co.re/oncoanalyser/usage#introduction) in `oncoanalyser` and +the tool information below primarily relates to the `wgts` and `targeted` analysis modes. + +> [!NOTE] +> Due to the limitations of panel data, certain tools (indicated with `*` below) do not run in `targeted` mode. - Read alignment: [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) (DNA), [STAR](https://github.com/alexdobin/STAR) (RNA) - Read post-processing: [REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) (DNA), [Picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard) (RNA) @@ -63,6 +71,10 @@ tools. Due to the limitations of panel data, certain tools (indicated with `*` b - Pharmacogenomics: [PEACH](https://github.com/hartwigmedical/hmftools/tree/master/peach) - Summary report: [ORANGE](https://github.com/hartwigmedical/hmftools/tree/master/orange), [linxreport](https://github.com/umccr/linxreport) +For the `purity_estimate` mode, several of the above tools are run with adjusted configuration in addition to the following. + +- Tumor fraction estimation: [WISP](https://github.com/hartwigmedical/hmftools/tree/master/wisp) + ## Usage > [!NOTE] @@ -82,7 +94,7 @@ Launch `oncoanalyser`: ```bash nextflow run nf-core/oncoanalyser \ -profile \ - -revision 2.1.0 \ + -revision 2.2.0 \ --mode \ --genome \ --input samplesheet.csv \ diff --git a/conf/base.config b/conf/base.config index 0c51cce6..27564110 100644 --- a/conf/base.config +++ b/conf/base.config @@ -63,5 +63,6 @@ process { } withLabel: process_gpu { ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } } } diff --git a/conf/hmf_data.config b/conf/hmf_data.config index fbd9a445..4cbbadf0 100644 --- a/conf/hmf_data.config +++ b/conf/hmf_data.config @@ -9,23 +9,21 @@ params { // CUPPA cuppa_alt_sj = 'misc/cuppa/alt_sj.selected_loci.37.tsv.gz' cuppa_classifier = 'misc/cuppa/cuppa_classifier.37.pickle.gz' - // SV Prep - sv_prep_blocklist = 'dna/sv/sv_prep_blacklist.37.bed' // ESVEE decoy_sequences_image = 'dna/sv/hg38_decoys.fa.img' gridss_pon_breakends = 'dna/sv/sgl_pon.37.bed.gz' gridss_pon_breakpoints = 'dna/sv/sv_pon.37.bedpe.gz' repeatmasker_annotations = 'dna/sv/repeat_mask_data.37.fa.gz' // Isofox - alt_sj_distribution = 'rna/isofox.hmf_3444.alt_sj_cohort.37.csv' - gene_exp_distribution = 'rna/isofox.hmf_3444.gene_distribution.37.csv' + alt_sj_distribution = 'rna/isofox.hmf_3444.alt_sj_cohort.37.csv.gz' + gene_exp_distribution = 'rna/isofox.hmf_3444.gene_distribution.37.csv.gz' isofox_counts = 'rna/read_151_exp_counts.37.csv' isofox_gc_ratios = 'rna/read_100_exp_gc_ratios.37.csv' // LILAC lilac_resources = 'misc/lilac/' // Neo neo_resources = 'misc/neo/binding/' - cohort_tpm_medians = 'misc/neo/tpm_cohort/isofox.hmf_3444.transcript_medians.37.csv' + cohort_tpm_medians = 'misc/neo/tpm_cohort/isofox.hmf_3444.transcript_medians.37.csv.gz' // CIDER cider_blastdb = 'misc/cider/blastdb/' // PEACH @@ -76,23 +74,21 @@ params { // CUPPA cuppa_alt_sj = 'misc/cuppa/alt_sj.selected_loci.38.tsv.gz' cuppa_classifier = 'misc/cuppa/cuppa_classifier.38.pickle.gz' - // SV Prep - sv_prep_blocklist = 'dna/sv/sv_prep_blacklist.38.bed' // ESVEE decoy_sequences_image = [] gridss_pon_breakends = 'dna/sv/sgl_pon.38.bed.gz' gridss_pon_breakpoints = 'dna/sv/sv_pon.38.bedpe.gz' repeatmasker_annotations = 'dna/sv/repeat_mask_data.38.fa.gz' // Isofox - alt_sj_distribution = 'rna/isofox.hmf_3444.alt_sj_cohort.38.csv' - gene_exp_distribution = 'rna/isofox.hmf_3444.gene_distribution.38.csv' + alt_sj_distribution = 'rna/isofox.hmf_38_151_2600.alt_sj_cohort.csv.gz' + gene_exp_distribution = 'rna/isofox.hmf_38_151_2600.gene_distribution.csv.gz' isofox_counts = 'rna/read_151_exp_counts.38.csv' isofox_gc_ratios = 'rna/read_100_exp_gc_ratios.38.csv' // LILAC lilac_resources = 'misc/lilac/' // Neo neo_resources = 'misc/neo/binding/' - cohort_tpm_medians = 'misc/neo/tpm_cohort/isofox.hmf_3444.transcript_medians.38.csv' + cohort_tpm_medians = 'misc/neo/tpm_cohort/isofox.hmf_38_151_2600.transcript_medians.csv.gz' // CIDER cider_blastdb = 'misc/cider/blastdb/' // PEACH diff --git a/conf/modules.config b/conf/modules.config index 3293e4bf..aa148a85 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -13,9 +13,8 @@ process { withName: 'WRITE_REFERENCE_DATA' { - def date = new java.util.Date().format('yyyyMMdd_HHmmss'); publishDir = [ - path: { "${params.outdir}/reference_data/${workflow_version}/${date}" }, + path: { "${params.outdir}/reference_data/${workflow.manifest.version}" }, mode: params.publish_dir_mode, ] } @@ -32,103 +31,124 @@ process { publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/alignments/rna/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/alignments/rna/${filename}") }, ] } withName: 'REDUX' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/alignments/dna/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/alignments/dna/${filename}") }, ] } withName: 'AMBER' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: 'COBALT' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } - withName: '.*:ESVEE_CALLING:ESVEE_(PREP|ASSEMBLE|DEPTH_ANNOTATOR|CALL)' { + withName: '.*:ESVEE_CALLING:ESVEE' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/esvee/${filename}" } + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: '.*:SAGE_CALLING:SAGE_GERMLINE' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sage/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/sage_calling/${filename}") }, ] } withName: '.*:SAGE_CALLING:SAGE_SOMATIC' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sage/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/sage_calling/${filename}") }, ] } - withName: '.*:SAGE_APPEND:SAGE_APPEND_(?:GERMLINE|SOMATIC)' { + withName: '.*:SAGE_APPEND:SAGE_APPEND_GERMLINE' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/sage/append/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/sage_append/germline") }, + ] + } + + withName: '.*:SAGE_APPEND:SAGE_APPEND_SOMATIC' { + ext.log_level = { "${params.hmftools_log_level}" } + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/sage_append/somatic") }, ] } withName: '.*:PAVE_ANNOTATION:PAVE_(?:GERMLINE|SOMATIC)' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/pave/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/pave/${filename}") }, ] } withName: 'PURPLE' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: '.*:LINX_ANNOTATION:LINX_GERMLINE' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/germline_annotations/" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/linx/germline_annotations/") }, ] } withName: '.*:LINX_ANNOTATION:LINX_SOMATIC' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/somatic_annotations/" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/linx/somatic_annotations/") }, ] } withName: '.*:LINX_PLOTTING:LINX_VISUALISER' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/somatic_plots/" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/linx/somatic_plots/") }, ] } @@ -136,31 +156,34 @@ process { publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/linx/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/linx/${filename}") }, ] } withName: 'CIDER' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: 'BAMTOOLS' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/bamtools/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/bamtools/${filename}") }, ] } withName: 'CHORD' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } @@ -171,18 +194,20 @@ process { } withName: 'LILAC' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" } + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: 'SIGS' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } @@ -190,7 +215,7 @@ process { publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.id}/teal/${new File(filename).name}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.id}/teal/${new File(filename).name}") }, ] } @@ -198,80 +223,138 @@ process { publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/virusbreakend/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/virusbreakend/${filename}") }, ] } withName: 'VIRUSINTERPRETER' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: 'ISOFOX' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: 'NEO_SCORER' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/scorer/" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/neo/scorer/") }, ] } withName: '.*:NEO_PREDICTION:NEO_ANNOTATE_FUSIONS' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/annotated_fusions/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/neo/annotated_fusions/${filename}") }, ] } withName: 'NEO_FINDER' { + ext.log_level = { "${params.hmftools_log_level}" } + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/neo/finder/") }, + ] + } + + withName: 'WISP' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/neo/finder/" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: 'CUPPA' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: 'PEACH' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : "${meta.key}/${filename}" }, + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/${filename}") }, ] } withName: 'ORANGE' { + ext.log_level = { "${params.hmftools_log_level}" } publishDir = [ path: { "${params.outdir}" }, mode: params.publish_dir_mode, // NOTE(SW): java.io.File and Nextflow's file do not work here, resorting to string splitting - saveAs: { filename -> - if (filename.equals('versions.yml')) { - return null - } else { - def tokens = filename.split('[/]') - return "${meta.key}/orange/${tokens[-1]}" - } - } + saveAs: { filename -> get_saveas_path(meta, task, filename, "${meta.key}/orange/${filename.split('[/]')[-1]}") }, + ] + } + + withName: 'COBALT_PANEL_NORMALISATION' { + ext.log_level = { "${params.hmftools_log_level}" } + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> get_saveas_path(meta, task, filename, "panel_resources/${filename}", panel_resource_creation = true) }, + ] + } + + withName: 'PAVE_PON_PANEL_CREATION' { + ext.log_level = { "${params.hmftools_log_level}" } + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> get_saveas_path(meta, task, filename, "panel_resources/${filename}", panel_resource_creation = true) }, + ] + } + + withName: 'ISOFOX_PANEL_NORMALISATION' { + ext.log_level = { "${params.hmftools_log_level}" } + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> get_saveas_path(meta, task, filename, "panel_resources/${filename}", panel_resource_creation = true) }, ] } } + +def get_saveas_path(meta, task, filename, path, panel_resource_creation=false) { + if (filename.equals('versions.yml')) { + return null + } else if (filename.contains('.command.')) { + if (filename ==~ /.*\.command\.(sh|out|err|log|run)/) { + def process_name = task.process.toLowerCase().replaceFirst(/^.+:/, '') + + if (panel_resource_creation) { + return "panel_resources/logs/${process_name}${filename}" + } else { + return "${meta.key}/logs/${meta.id}.${process_name}${filename}" + } + + } else { + return null + } + } else { + return path + } +} diff --git a/conf/panel_data.config b/conf/panel_data.config index 352f5dd2..80de6843 100644 --- a/conf/panel_data.config +++ b/conf/panel_data.config @@ -5,33 +5,29 @@ params { tso500 { '37' { - driver_gene_panel = 'common/DriverGenePanel.tso500.37.tsv' - sage_actionable_panel = 'variants/ActionableCodingPanel.tso500.37.bed.gz' - sage_coverage_panel = 'variants/CoverageCodingPanel.tso500.37.bed.gz' - pon_artefacts = 'variants/pon_artefacts.tso500.37.tsv.gz' - target_region_bed = 'copy_number/target_regions_definition.tso500.37.bed.gz' - target_region_normalisation = 'copy_number/cobalt_normalisation.tso500.37.tsv' - target_region_ratios = 'copy_number/target_regions_ratios.tso500.37.tsv' - target_region_msi_indels = 'copy_number/target_regions_msi_indels.tso500.37.tsv' - isofox_tpm_norm = 'rna_resources/isofox.gene_normalisation.tso500.37.csv' - isofox_gene_ids = 'rna_resources/tso500_rna_gene_ids.csv' - isofox_counts = 'rna_resources/read_93_exp_counts.37.csv' - isofox_gc_ratios = 'rna_resources/read_93_exp_gc_ratios.37.csv' + driver_gene_panel = 'driver_genes.tso500.37.tsv' + pon_artefacts = 'pon_artefacts.tso500.37.tsv.gz' + target_region_bed = 'panel_definition.tso500.37.bed.gz' + target_region_normalisation = 'cobalt_normalisation.tso500.37.tsv' + target_region_ratios = 'tmb_ratio.tso500.37.tsv' + target_region_msi_indels = 'msi_indels.tso500.37.tsv' + isofox_tpm_norm = 'isofox.gene_normalisation.tso500.37.csv' + isofox_gene_ids = 'tso500_rna_gene_ids.csv' + isofox_counts = 'read_93_exp_counts.37.csv' + isofox_gc_ratios = 'read_93_exp_gc_ratios.37.csv' } '38' { - driver_gene_panel = 'common/DriverGenePanel.tso500.38.tsv' - sage_actionable_panel = 'variants/ActionableCodingPanel.tso500.38.bed.gz' - sage_coverage_panel = 'variants/CoverageCodingPanel.tso500.38.bed.gz' - pon_artefacts = 'variants/pon_artefacts.tso500.38.tsv.gz' - target_region_bed = 'copy_number/target_regions_definition.tso500.38.bed.gz' - target_region_normalisation = 'copy_number/cobalt_normalisation.tso500.38.tsv' - target_region_ratios = 'copy_number/target_regions_ratios.tso500.38.tsv' - target_region_msi_indels = 'copy_number/target_regions_msi_indels.tso500.38.tsv' - isofox_tpm_norm = 'rna_resources/isofox.gene_normalisation.tso500.38.csv' - isofox_gene_ids = 'rna_resources/tso500_rna_gene_ids.csv' - isofox_counts = 'rna_resources/read_93_exp_counts.38.csv' - isofox_gc_ratios = 'rna_resources/read_93_exp_gc_ratios.38.csv' + driver_gene_panel = 'driver_genes.tso500.38.tsv' + pon_artefacts = 'pon_artefacts.tso500.38.tsv.gz' + target_region_bed = 'panel_definition.tso500.38.bed.gz' + target_region_normalisation = 'cobalt_normalisation.tso500.38.tsv' + target_region_ratios = 'tmb_ratio.tso500.38.tsv' + target_region_msi_indels = 'msi_indels.tso500.38.tsv' + isofox_tpm_norm = 'isofox.gene_normalisation.tso500.38.csv' + isofox_gene_ids = 'tso500_rna_gene_ids.csv' + isofox_counts = 'read_93_exp_counts.38.csv' + isofox_gc_ratios = 'read_93_exp_gc_ratios.38.csv' } } diff --git a/docs/output.md b/docs/output.md index c2d12e45..cb67d80d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -3,7 +3,8 @@ ## Introduction This document describes the output produced by the pipeline. The directories listed below will be created in the results -directory after the pipeline has finished. All paths are relative to the top-level results directory. +directory for a typical WGTS analysis after the pipeline has finished. All paths are relative to the top-level results +directory. ```tree output/ @@ -20,6 +21,7 @@ output/ │   ├── isofox/ │   ├── lilac/ │   ├── linx/ +│   ├── logs/ │   ├── orange/ │   ├── pave/ │   ├── peach/ @@ -76,9 +78,12 @@ output/ - [CUPPA](#cuppa) - Tissue of origin prediction - [Pharmacogenomics](#pharmacogenomics) - [PEACH](#peach) - Pharmacogenomic assessment +- [Tumor fraction estimate](#tumor-fraction-estimate) + - [WISP](#wisp) - Tumor fraction estimate for longitudinal samples - [Report generation](#report-generation) - [ORANGE](#orange) - Summary report - [linxreport](#linxreport) - Interactive LINX report +- [Logs](#logs) - Run command and log files per tool/process - [Pipeline information](#pipeline-information) - Workflow execution metrics ### Read alignment @@ -154,12 +159,10 @@ _Picard MarkDuplicates is only run on RNA alignments_ Output files - `/sage/append/` - - `.sage.append.vcf.gz`: Tumor DNA sample small variant VCF with RNA data appended. - `.sage.append.vcf.gz`: Normal DNA sample small variant VCF with RNA data appended. - `/sage/somatic/` - - `.sage.bqr.png`: Normal DNA sample base quality recalibration metrics plot. - `.sage.bqr.tsv`: Normal DNA sample base quality recalibration metrics. - `.sage.bqr.png`: Tumor DNA sample base quality recalibration metrics plot. @@ -209,7 +212,6 @@ information with regards to transcript and coding effects. Output files - `/esvee/prep/` - - `.esvee.prep.bam`: Tumor DNA sample BAM with candidate SV reads. - `.esvee.prep.bam.bai`: Tumor DNA sample BAM index. - `.esvee.prep.disc_stats.tsv`: Tumor DNA sample discordant reads stats. @@ -219,7 +221,6 @@ information with regards to transcript and coding effects. - `.esvee.prep.bam.bai`: Tumor DNA sample BAM index. - `/esvee/assemble/` - - `.esvee.assembly.tsv`: Tumor DNA sample breakend assemblies. - `.esvee.alignment.tsv`: Tumor DNA sample assemblies realigned to the reference genome. - `.esvee.breakend.tsv`: Tumor DNA sample breakends. @@ -228,7 +229,6 @@ information with regards to transcript and coding effects. - `.esvee.raw.vcf.gz.tbi`: Tumor DNA sample VCF with candidate breakends. - `/esvee/depth_annotation/` - - `.esvee.ref_depth.vcf.gz`: Tumor DNA sample VCF annotated with normal sample read depths. - `.esvee.ref_depth.vcf.gz.tbi`: Tumor DNA sample VCF index. @@ -330,7 +330,6 @@ purity/ploidy and annotates both small and structural variant calls with copy-nu Output files - `/linx/germline_annotations/` - - `linx.version`: LINX version file. - `.linx.germline.breakend.tsv`: Normal DNA sample breakend data. - `.linx.germline.clusters.tsv`: Normal DNA sample clustered events. @@ -340,7 +339,6 @@ purity/ploidy and annotates both small and structural variant calls with copy-nu - `.linx.germline.svs.tsv`: Normal DNA sample structural variants. - `/linx/somatic_annotations/` - - `linx.version`: LINX version file. - `.linx.breakend.tsv`: Tumor DNA sample breakend data. - `.linx.clusters.tsv`: Tumor DNA sample clustered events. @@ -474,11 +472,9 @@ for each of the IG and TCR loci including an abundance estimate. Output files - `/neo/finder/` - - `.neo_data.tsv`: Tumor sample neoepitope candidates. - `/neo/annotated_fusions/` - - `.isf.neoepitope.tsv`: Tumor sample annotated Isofox fusions. - `/neo/scorer/` @@ -551,7 +547,7 @@ using DNA and/or RNA features generated by upstream WiGiTS components. - `.peach.events.tsv`: Normal DNA sample variant events. - `.peach.gene.events.tsv`: Normal DNA sample variant events (linked by gene). - `.peach.haplotypes.all.tsv`: Normal DNA sample all haplotypes. - - `.peach.haplotypes.best.tsv`: Normal DNA sample best haplotypes.. + - `.peach.haplotypes.best.tsv`: Normal DNA sample best haplotypes. - `.peach.qc.tsv`: PEACH QC file. @@ -559,6 +555,29 @@ using DNA and/or RNA features generated by upstream WiGiTS components. [PEACH](https://github.com/hartwigmedical/hmftools/tree/master/peach) infers haplotypes for interpretation in a pharmacogenomic context. +### Tumor fraction estimate + +#### WISP + +
+Output files + +- `/wisp/` + - `.cn_gc_ratio_fit.png`: Longitudinal sample copy number GC ratio fit. + - `_.wisp.amber_loh.tsv`: Longitudinal sample LOH sites used for tumor fraction estimation. + - `_.wisp.cn_plot_calcs.tsv`: Longitudinal sample copy number fit coefficients. + - `_.wisp.cn_segments.tsv`: Longitudinal sample copy number segments. + - `_.wisp.fragment_length.tsv`: Longitudinal sample fragment lengths stats. + - `_.wisp.somatic_peak.tsv`: Longitudinal sample implied tumor fraction per somatic variant. + - `_.wisp.somatic_variants.tsv`: Longitudinal sample counts and filtering rules per variant used in the SNV tumor fraction estimate. + - `_.wisp.summary.tsv`: Longitudinal sample summary. + +
+ +[WISP](https://github.com/hartwigmedical/hmftools/tree/master/wisp) estimates tumor fraction in +a given sample (typically ctDNA), guided by the biomarkers identified prior analysis of the same +patient (typically a primary tissue biopsy). + ### Report generation #### ORANGE @@ -587,6 +606,20 @@ hmftool components into a single static PDF report. [linxreport](https://github.com/umccr/linxreport) generates an interactive report containing LINX annotations and plots. +### Logs + +
+Output files + +- `/logs/` + - `..command.sh`: Run command with tool arguments + - `..command.out`: Standard output + - `..command.err`: Standard error + - `..command.log`: Combined standard output and error (may not exist for some executors) +
+ +The logs directory stores the `.command.*` files for each tool from the Nextflow work directory + ### Pipeline information
diff --git a/docs/usage.md b/docs/usage.md index 2815cffc..1b9e4178 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,53 +10,67 @@ The `oncoanalyser` pipeline typically runs from FASTQ, BAM, or CRAM [input files most GRCh37 and GRCh38 human [reference genome builds](#custom-genomes), and provides UMI ([unique molecular identifier](#umi-processing)) processing for DNA sequencing data. -The pipeline supports two workflow modes: (1) whole genome and/or transcriptome, and (2) targeted panel. Both modes -accept DNA and RNA sequencing data from matched tumor / normal (with optional -[donor](#paired-tumor-and-normal-dna-with-donor-sample) sample) and tumor-only samples. The below table shows the -supported [sample setups](#sample-setups): - -| Data Type | Tumor DNA | Normal DNA | Donor DNA | Tumor RNA | -| --------- | ------------------ | ------------------ | ------------------ | ------------------ | -| DNA | :white_check_mark: | - | - | - | -| DNA | :white_check_mark: | :white_check_mark: | - | - | -| DNA | :white_check_mark: | :white_check_mark: | :white_check_mark: | - | -| DNA + RNA | :white_check_mark: | - | - | :white_check_mark: | -| DNA + RNA | :white_check_mark: | :white_check_mark: | - | :white_check_mark: | -| DNA + RNA | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | -| RNA | - | - | - | :white_check_mark: | +Two main analysis modes are supported by `oncoanalyser`: -## Running the pipeline +- [**wgts**](#whole-genome--transcriptome-sequencing-wgts): whole genome and/or transcriptome sequencing +- [**targeted**](#targeted-sequencing): targeted/panel sequencing -:::tip +Both modes accept various combinations of DNA and/or RNA sequencing data from tumor-only or matched tumor / normal (with optional +[donor](#paired-tumor-and-normal-dna-with-donor-sample) sample). The below table shows the supported [sample setups](#sample-setups): -Jump to [FAQ and troubleshooting](/oncoanalyser/2.1.0/docs/usage/faq_and_troubleshooting) +| DNA samples | RNA samples | +| ------------------------ | ----------- | +| `tumor` | - | +| `tumor`+`normal` | - | +| `tumor`+`normal`+`donor` | - | +| `tumor` | `tumor` | +| `tumor`+`normal` | `tumor` | +| `tumor`+`normal`+`donor` | `tumor` | +| - | `tumor` | -::: +Besides the main analysis modes, several other modes are also available: + +- [**purity_estimate**](#purity-estimate): tumor fraction estimation in longitudinal samples (e.g. for MRD) +- [**prepare_reference**](#automatic-staging): staging genomes and WiGiTS tool reference data +- [**panel_resource_creation**](#custom-panels): creating reference data for custom panels + +## Running the pipeline + +If you intend to run `oncoanalyser` on more than one sample, we recommend first [staging](#staging-reference-data) and +[configuring](#configuring-reference-data) reference data (genome and tool specific data). Otherwise, reference data is +automatically staged every run resulting in unnecessary disk/network usage. A typical command for running `oncoanalyser` is shown below: ```bash nextflow run nf-core/oncoanalyser \ + -revision 2.2.0 \ + -config reference_data.config \ # Optional but recommended -profile docker \ - -revision 2.1.0 \ --mode wgts \ --genome GRCh38_hmf \ --input samplesheet.csv \ - --outdir output/ + --outdir output/ \ ``` -The [samplesheet](#samplesheet) provided to `--input` argument contains input sample details and corresponding files to -be analysed. +Below is a brief description of each argument: -Additionally, various features of `oncoanalyser` can be configured by using a file provided to the `-config` argument. -This is generally recommended and it can be used to customise a number of settings or resources including: +- `-profile`: [configuration presets](#-profile) for different compute environments +- `-revision`: `oncoanalyser` version to run (can be a git [tag](https://github.com/nf-core/oncoanalyser/tags), [branch](https://github.com/nf-core/oncoanalyser/branches), or commit hash) +- `--mode`: [run mode](#run-modes) +- `--genome`: genome version, typically `GRCh38_hmf` or `GRCh37_hmf` +- `--input`: the [samplesheet](#samplesheet) containing sample details and corresponding files to be analysed +- `--output`: output directory +- `-config`: one or more configuration files for customising e.g. genome and tool specific data (as mentioned above), + normalisation data for [custom panels](#custom-panels) (TSO500 panel supported by default), [compute resources](#compute-resources), or + [other configuration](#custom-configuration) + +:::tip -- Reference genome and tool specific data: it is strongly recommended to [stage](#staging-reference-data) these files. - Otherwise, `oncoanalyser` automatically stages them every run resulting in unnecessary disk/network usage -- Panel normalisation data: all panels except the built-in TSO500 panel require [additional - setup](#panel-reference-data) of reference data -- [Other configuration](#custom-configuration): this may include [compute resources](#compute-resources) or [UMI - settings](#umi-processing) +If you encounter any issues setting up or running `oncoanalyser`, please see +[FAQ and troubleshooting](/oncoanalyser/2.2.0/docs/usage/faq_and_troubleshooting) + +::: ### Outputs @@ -87,7 +101,7 @@ outdir: 'output/' and be run using this command: ```bash -nextflow run nf-core/oncoanalyser -revision 2.1.0 -profile docker -params-file params.yaml +nextflow run nf-core/oncoanalyser -revision 2.2.0 -profile docker -params-file params.yaml ``` You can also generate such `yaml`/`json` files via [nf-core/launch](https://nf-co.re/launch). @@ -104,7 +118,7 @@ nextflow pull nf-core/oncoanalyser It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/oncoanalyser releases page](https://github.com/nf-core/oncoanalyser/releases) and find the latest pipeline version - numeric only (e.g. `2.1.0`). Then specify this when running the pipeline with `-revision` (one hyphen) - e.g. `-revision 2.1.0`. Of course, you can switch to another version by changing the number after the `-revision` flag. +First, go to the [nf-core/oncoanalyser releases page](https://github.com/nf-core/oncoanalyser/releases) and find the latest pipeline version - numeric only (e.g. `2.2.0`). Then specify this when running the pipeline with `-revision` (one hyphen) - e.g. `-revision 2.2.0`. Of course, you can switch to another version by changing the number after the `-revision` flag. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, in the `/pipeline_info/software_versions.yml` file. @@ -128,7 +142,7 @@ row as the first line with the below columns: | `sample_id` | Sample identifier | | `sample_type` | Sample type: `tumor`, `normal` | | `sequence_type` | Sequence type: `dna`, `rna` | -| `filetype` | File type: e.g. `fastq`, `bam`, `bai`; a full list of valid values can be found [here](https://github.com/nf-core/oncoanalyser/blob/2.1.0/lib/Constants.groovy#L56) | +| `filetype` | File type: e.g. `fastq`, `bam`, `bai`; a full list of valid values can be found [here](https://github.com/nf-core/oncoanalyser/blob/2.2.0/lib/Constants.groovy#L80) | | `info` | Additional sample information such as sequencing library and lane for [FASTQ](#fastq) files, this column is only required when running an analysis from FASTQ | | `filepath` | Absolute filepath to input file, which can be a local filepath or supported protocol (http, https, ftp, s3, az, gz) | @@ -164,7 +178,7 @@ Currently only gzip compressed, non-interleaved paired-end FASTQ files are curre ::: -#### BAM and CRAM +#### BAM To run from BAM, specify `bam` in the `filetype` field: @@ -183,27 +197,74 @@ PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bai,/other/dir/PATIENT1-T.dna.bam.bai ``` -To run from CRAM, simply provide the CRAM and optionally the CRAM index with `bam` or `bai` in the `filetype` field: +#### CRAM -```csv title="samplesheet.cram_crai.csv" +:::info + +To run analyses starting from CRAM, you must use the CRAM format version ≤3.0 with the reference fully embedded. An +example command converting to the appropriate CRAM format is shown: + +```bash +samtools view \ + --cram \ + --output-fmt-option version=3.0 \ + --output-fmt-option embed_ref=1 \ + --output-fmt-option reference=/path/to/reference.fasta \ + --output sample.cram \ + --threads 4 \ + --write-index \ + sample.bam +``` + +::: + +To run from CRAM, use `cram` and `crai` in the `filetype` field. `crai` only needs to be provided if the CRAM index is +not in the same directory as the CRAM file: + +```csv title="samplesheet.cram.csv" +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath +PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,cram,/path/to/PATIENT1-T.dna.cram +PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,crai,/other/dir/PATIENT1-T.dna.cram.crai +``` + +Similarly, for REDUX CRAMs, provide `cram_redux` and optionally `crai`: + +```csv title="samplesheet.redux_cram.csv" group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath -PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.cram -PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bai,/other/dir/PATIENT1-T.dna.cram.crai +PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,cram_redux,/path/to/PATIENT1-T.dna.redux.cram +PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,crai,/other/dir/PATIENT1-T.dna.cram.crai ``` -#### REDUX BAM +:::warning + +There is a fixed performance cost associated with reading CRAM files. This means the time it takes to read large CRAMs +vs BAMs is similar, whereas reading small CRAMs can take significantly longer (>10x) than reading small BAMs. If you +have small CRAMs (e.g. <10GB), it will be faster to decompress the CRAM into a BAM, and then run `oncoanalyser` with +this BAM. + +This performance issue is due to how CRAM reading is implemented in +[htsjdk](https://github.com/samtools/htsjdk/blob/master/src/main/java/htsjdk/samtools/CRAMFileReader.java) (which is +used throughout the WiGiTS tools). We plan to address this issue in future releases of `oncoanalyser`. + +::: + +#### REDUX BAM / CRAM When running an analysis with DNA data from FASTQ, two of the most time consuming and resource intensive pipeline steps are [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) read alignment and -[REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) alignment processing. Where the REDUX output BAM -already exists for a given sample from a prior analysis, these read alignment and processing steps can be skipped by -providing the REDUX BAM as `bam_redux` in the `filetype` field. The REDUX BAM index can also optionally be provided with -`filetype` as `bai` if required. +[REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) alignment processing. + +`oncoanalyser` can be run starting from REDUX BAMs or CRAMs if they already exist from a prior analysis. + +For REDUX BAMs, provide `bam_redux`/`cram_redux` in the `filetype` field, and optionally the BAM/CRAM index to `bai`/`crai` (only required +if indexes are not in the same directory as the BAM/CRAM): ```csv title="samplesheet.redux_bam_bai.csv" group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam_redux,/path/to/PATIENT1-T.dna.redux.bam PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bai,/other/dir/PATIENT1-T.dna.redux.bam.bai +PATIENT2,PATIENT2,PATIENT2-T,tumor,dna,cram_redux,/path/to/PATIENT2-T.dna.redux.cram +PATIENT2,PATIENT2,PATIENT2-T,tumor,dna,crai,/other/dir/PATIENT2-T.dna.redux.cram.crai ``` The `*.jitter_params.tsv` and `*.ms_table.tsv.gz` REDUX output files are expected to be in the same directory as the @@ -226,10 +287,10 @@ You can also [start from existing inputs](#starting-from-existing-inputs) other :::warning -When starting from REDUX BAM, the filenames must have the format: +When starting from REDUX BAM/CRAM, the filenames must have the format: -- `.redux.bam` -- `.redux.bam.bai` +- `.redux.bam` or `.redux.cram` +- `.redux.bam.bai` or `.redux.cram.crai` - `.jitter_params.tsv` - `.ms_table.tsv.gz` @@ -312,43 +373,46 @@ documentation](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/R ### Staging reference data By default `oncoanalyser` will download the required pre-configured reference data (based on the provided samplesheet -and CLI arguments) to the Nextflow work directory during every run before proceeding with the analysis. It is therefore -strongly recommended to first stage and configure reference data to avoid repeated retrieval when performing multiple -`oncoanalyser` analyses. +and CLI arguments) to the Nextflow work directory during every run before proceeding with the analysis. -#### Automatic staging +However, strongly recommended to first stage and configure reference data to avoid repeated retrieval when +performing multiple `oncoanalyser` analyses. See the below for instructions. -All reference data required for an analysis can be staged and prepared automatically by `oncoanalyser`. This is done by -configuring the desired analysis and then including the `--prepare_reference_only` argument, which causes `oncoanalyser` -to write reference data to the specified output directory without running the full pipeline. +#### Automatic staging -For example the below samplesheet and command for analysing DNA data in `wgts` mode will stage the required `GRCh38_hmf` -genome (and indexes) and [WiGiTS](https://github.com/hartwigmedical/hmftools) resources files. As this analysis only -involves WGS data, no reference data files related to RNA or the `panel` mode will be retrieved. - -```csv title="samplesheet.tn_dna.csv" -group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath -PATIENT1,PATIENT1,PATIENT1-N,normal,dna,bam,/path/to/PATIENT1-N.dna.bam -PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam -``` +The reference data required for running `oncoanalyser` can be staged automatically using +`--mode prepare_reference` and specifying `--ref_data_types`. The below example command will stage the required +`GRCh38_hmf` genome (and indexes) and [WiGiTS](https://github.com/hartwigmedical/hmftools) resources files for WGS +analysis from BAM. ```bash nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ + -revision 2.2.0 \ -profile docker \ - --mode wgts \ + --mode prepare_reference \ + --ref_data_types wgs \ --genome GRCh38_hmf \ - --input samplesheet.csv \ - --outdir output/ \ - --prepare_reference_only + --outdir output/ ``` -Executing the above command will download and prepare default reference data without running any analysis, and once -complete the prepared reference files can be found in `./prepare_reference/reference_data/2.1.0//`. You can then provide -a config file that points to these reference files (see [Configuring reference data](#configuring-reference-data)) which can -be used for subsequent `oncoanalyser` runs. - -It is recommended to remove the Nextflow work directory once reference data staging is complete to free disk space. +Once the above commands complete, the stated reference data can be found in `/reference_data/2.2.0`. You will +then need to provide a config file that points to these reference files (see [Configuring reference data](#configuring-reference-data)) +which can be used for subsequent `oncoanalyser` runs. The Nextflow work directory can also be removed to free up disk +space. + +The below table shows the possible values for `--ref_data_types`. Note that multiple can be provided as comma separated +list, e.g. `--ref_data_types wgs,dna_alignment` + +| Value | Description | Combination of | +| :--------------------------------- | :---------------------------------------------------------------------------------------- | :-------------------------------------------------------- | +| `wgs` | Ref data for WGS analysis from BAM | `fasta`, `fai`, `dict`, `img`, `hmftools`, `gridss_index` | +| `wts` | Ref data for WTS analysis from BAM | `fasta`, `fai`, `dict`, `img`, `hmftools` | +| `targeted` | Ref data for targeted analysis from BAM | `fasta`, `fai`, `dict`, `img`, `hmftools`, `panel` | +| `bwamem2_index` or `dna_alignment` | BWA-MEM2 index. Required if aligning DNA FASTQs | | +| `star_index` or `rna_alignment` | STAR index. Required if aligning RNA FASTQs | | +| `gridss_index` | GRIDSS index. Required if running Virusbreakend/Virusinterpreter | | +| `hmftools` | [WiGiTS](https://github.com/hartwigmedical/hmftools) resources files | | +| `panel` | Panel ref data. Only TSO500 currently supported. Please also specify arg `--panel tso500` | | #### Manual staging @@ -385,85 +449,11 @@ The configuration file can then be supplied to `oncoanalyser` via the `-config < ```bash nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ - -config refdata.config \ + -revision 2.2.0 \ + -config reference_data.config \ <...> ``` -### Panel reference data - -Analysis of panel / targeted sequencing data requires additional panel-specific reference data (e.g. region / gene -definitions, copy number and transcript normalisation data, known artefacts). This data is included and pre-configured -for the TSO500 panel, and can be used to analyse TSO500 sequence data by setting `--panel tso500` when running in -`targeted` mode: - -```bash -nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ - -config refdata.config \ - -profile docker \ - --genome GRCh38_hmf \ - --mode targeted \ - --panel tso500 \ - --input samplesheet.csv \ - --outdir output/ -``` - -For panels other than TSO500 (including whole exome), the panel-specific reference data must first be generated using a -training procedure detailed [here](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/README_TARGETED.md). -The resulting panel-specific reference data must then be defined in a configuration file: - -```groovy title="panel.config" -params { - ref_data_panel_data_path = "/path/to/my_custom_panel_resources/" - - // These are relative paths within the dir provided by `ref_data_panel_data_path` above - panel_data_paths { - - mycustompanel { // This is the name that should be passed to the `--panel` argument - - // Genome version: '37' or '38' - '38' { - driver_gene_panel = 'common/DriverGenePanel.custom_panel.38.tsv' - sage_actionable_panel = 'variants/ActionableCodingPanel.custom_panel.38.bed.gz' - sage_coverage_panel = 'variants/CoverageCodingPanel.custom_panel.38.bed.gz' - pon_artefacts = 'variants/pon_artefacts.custom_panel.38.tsv.gz' - target_region_bed = 'copy_number/target_regions_definition.custom_panel.38.bed.gz' - target_region_normalisation = 'copy_number/cobalt_normalisation.custom_panel.38.tsv' - target_region_ratios = 'copy_number/target_regions_ratios.custom_panel.38.tsv' - target_region_msi_indels = 'copy_number/target_regions_msi_indels.custom_panel.38.tsv' - - // The below are optional and filepaths can be omitted for non-RNA panels by providing an empty list, e.g.: - // isofox_tpm_norm = [] - isofox_tpm_norm = 'rna_resources/isofox.gene_normalisation.custom_panel.38.csv' - isofox_gene_ids = 'rna_resources/custom_panel.rna_gene_ids.csv' - isofox_counts = 'rna_resources/read_93_exp_counts.38.csv' - isofox_gc_ratios = 'rna_resources/read_93_exp_gc_ratios.38.csv' - } - } - } -} -``` - -To run an analysis of panel sequence data: - -- provide both the panel-specific reference data configuration file via the `-config ` argument -- set the panel name in the `--panel ` argument, this must match the name defined in the configuration file -- set the `--force_panel` argument, which is required when not using the built-in `tso500` panel - -```bash -nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ - -config panel.config \ - -profile docker \ - --genome GRCh38_hmf \ - --mode targeted \ - --panel mycustompanel \ - --force_panel \ - --input samplesheet.csv \ - --outdir output/ -``` - ### Custom genomes It is strongly recommended to use a Hartwig-distributed reference genome for alignments and subsequent analysis @@ -491,38 +481,29 @@ params { } ``` -Each index required for the analysis will first be created before running the rest of `oncoanalyser` with the following -command: - -:::tip - -In a process similar to [staging reference data](#automatic-staging), you can first generate the required indexes by -setting `--prepare_reference_only` and then provide the prepared reference files to `oncoanalyser` through a custom -config file. This avoids having to regenerate indexes for each new analysis. - -::: +Each index can then be created in by using `--mode prepare_reference` and `--ref_data_types` +(see section [staging reference data](#automatic-staging)). The below example command would create the indexes for WGS analysis: ```bash nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ - -profile docker \ + -revision 2.2.0 \ -config genome.custom.config \ - --mode wgts \ + -profile docker \ + --mode prepare_reference \ + --ref_data_types wgs,bwamem2_index,gridss_index --genome CustomGenome \ --genome_version <37|38> \ --genome_type \ --force_genome \ - --input samplesheet.csv \ --outdir output/ ``` -Creation of a STAR index also requires transcript annotations, please provide either of the following GTF files via the -`--ref_data_genome_gtf` option after decompressing: +If aligning FASTQs from RNA seq data for WTS analysis, you should also provide `star_index` to `--ref_data_types`. Creating the +STAR index also requires transcript annotations; please provide either of the following GTF files via the `--ref_data_genome_gtf` option +after decompressing: -- GRCh37: [GENCODE v37 (Ensembl v74) - annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz) -- GRCh38: [GENCODE v38 (Ensembl v104) - annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/gencode.v38.annotation.gtf.gz) +- GRCh37: [GENCODE v37 (Ensembl v74) annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz) +- GRCh38: [GENCODE v38 (Ensembl v104) annotations](https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_38/gencode.v38.annotation.gtf.gz) :::warning @@ -548,8 +529,8 @@ _GRCh37 genome (Hartwig): `GRCh37_hmf`_ | BWA-MEM2 index | [bwa-mem2_index-2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/25.1/bwa-mem2_index-2.2.1.tar.gz) | | GRIDSS index | [gridss_index-2.13.2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/25.1/gridss_index-2.13.2.tar.gz) | | STAR index | [star_index-gencode_19-2.7.3a.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh37_hmf/25.1/star_index-gencode_19-2.7.3a.tar.gz) | -| WiGiTS data | [hmf_pipeline_resources.37_v2.1.0--1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.37_v2.1.0--1.tar.gz) | -| TSO500 panel data | [hmf_panel_resources.tso500.37_v2.0.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.37_v2.0.0--3.tar.gz) | +| WiGiTS data | [hmf_pipeline_resources.37_v2.2.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.37_v2.2.0--3.tar.gz) | +| TSO500 panel data | [hmf_panel_resources.tso500.37_v2.2.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.37_v2.2.0--3.tar.gz) | _GRCh38 genome (Hartwig): `GRCh38_hmf`_ @@ -562,67 +543,208 @@ _GRCh38 genome (Hartwig): `GRCh38_hmf`_ | BWA-MEM2 index | [bwa-mem2_index-2.2.1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/25.1/bwa-mem2_index-2.2.1.tar.gz) | | GRIDSS index | [gridss_index-2.13.2.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/25.1/gridss_index-2.13.2.tar.gz) | | STAR index | [star_index-gencode_38-2.7.3a.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/genomes/GRCh38_hmf/25.1/star_index-gencode_38-2.7.3a.tar.gz) | -| WiGiTS data | [hmf_pipeline_resources.38_v2.1.0--1.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.38_v2.1.0--1.tar.gz) | -| TSO500 panel data | [hmf_panel_resources.tso500.38_v2.0.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.38_v2.0.0--3.tar.gz) | +| WiGiTS data | [hmf_pipeline_resources.38_v2.2.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.38_v2.2.0--3.tar.gz) | +| TSO500 panel data | [hmf_panel_resources.tso500.38_v2.2.0--3.tar.gz](https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.38_v2.2.0--3.tar.gz) | -## Process selection +## Run modes -It is possible to exclude or include specific processes when running `oncoanalyser`. The full list of processes that can -be selected is available [here](https://github.com/nf-core/oncoanalyser/blob/2.1.0/lib/Constants.groovy#L32). +### Whole genome / transcriptome sequencing (WGTS) -### Excluding processes - -Most of the major components in `oncoanalyser` can be skipped using the `--processes_exclude` argument. There are -circumstances where it is desirable to skip resource intensive processes like VIRUSBreakend or where you have no use for -the outputs from some process such as the ORANGE report. In the example of skipping the VIRUSBreakend and ORANGE -processes, the `oncoanalyser` command would take the following form: +`--mode wgts` is used for analysing of whole genome (WGS) and/or whole transcriptome (WTS) sequencing data, and can be run like so: ```bash nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ + -revision 2.2.0 \ + -config reference_data.config \ -profile docker \ --mode wgts \ - --processes_exclude virusinterpreter,orange \ --genome GRCh38_hmf \ --input samplesheet.csv \ --outdir output/ ``` -:::warning +### Targeted sequencing -When skipping components no checks are done to identify orphan processes in the execution DAG or for redundant -processes. +`--mode targeted` is used for analysing targeted or panel sequencing samples. The TSO500 panel has in-built support by setting +`--panel tso500`. A typical run command for TSO500 panels would be: -::: +```bash +nextflow run nf-core/oncoanalyser \ + -revision 2.2.0 \ + -config reference_data.config \ + -profile docker \ + --mode targeted \ + --panel tso500 \ + --genome GRCh38_hmf \ + --input samplesheet.csv \ + --outdir output/ +``` -### Manual process selection +Panels other than TSO500 require additional arguments, as well as custom reference data to be created. +Please see [Custom panels](#custom-panels). + +### Custom panels + +`--mode panel_resource_creation` assists with creating custom panel reference data files (for panels other than TSO500), which fit and +normalise the biases inherent to that specific panel. + +The below table summarises the required reference data files. Some panel reference data files must first be manually created - instructions +can be found on the [**WiGiTS targeted analysis readme**](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/README_TARGETED.md). +Some these files are used with `--mode panel_resource_creation` to create the remaining required reference data files. + +| Data type | File / config name | Comment | +| :-------- | :---------------------------- | :---------------------------------------------------------------------------------------------------------------------- | +| DNA | `driver_gene_panel` | Manually created | +| DNA | `target_region_bed` | Manually created | +| DNA | `target_region_msi_indels` | Manually created | +| DNA | `target_region_ratios` | Manually created | +| DNA | `target_region_normalisation` | Output from `--mode panel_resource_creation` | +| DNA | `pon_artefacts` | Output from `--mode panel_resource_creation` | +| RNA | `isofox_gene_ids` | Manually created | +| RNA | `isofox_tpm_norm` | Output from `--mode panel_resource_creation` | +| RNA | `isofox_counts` | Recommended to use `read_151_exp_counts..csv` from [WiGiTS reference data](#reference-data-urls) | +| RNA | `isofox_gc_ratios` | Recommended to use `read_100_exp_gc_ratios..csv` from [WiGiTS reference data](#reference-data-urls) | + +:::note + +RNA reference data is only required if your panel supports RNA sequencing data. + +::: -The `--processes_manual` argument can be used to enable manual process selection and `--processes_include -` to configure individual processes to execute. One use case would be to run processes which are -not run by default, such as neoepitope calling with [NEO](https://github.com/hartwigmedical/hmftools/tree/master/neo). -To do this, provide the below example samplesheet: +Once your manually created files are ready, create a samplesheet with a representative set of panel sequencing samples +(**≥20 recommended**). The below example samplesheet provides BAM files, but [FASTQ files](#fastq) can also be provided. -```csv title='samplesheet.manual.csv' +```csv title="samplesheet.panel_resource_creation.csv" group_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath -PATIENT1,PATIENT1,PATIENT1-N,normal,dna,bam,/path/to/PATIENT1-N.dna.wgs.bam -PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.wgs.bam -PATIENT1,PATIENT1,PATIENT1-T-RNA,tumor,rna,bam,/path/to/PATIENT1-T.rna.wgs.bam +PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam +PATIENT1,PATIENT1,PATIENT1-T,tumor,dna,bai,/path/to/PATIENT1-T.dna.bam.bai +PATIENT2,PATIENT2,PATIENT2-T,tumor,dna,bam,/path/to/PATIENT2-T.dna.bam +PATIENT2,PATIENT2,PATIENT2-T,tumor,dna,bai,/path/to/PATIENT2-T.dna.bam.bai ``` -Then, run `oncoanalyser` with the `neo` process selected as well as all required upstream processes: +Then, run `oncoanalyser` with `--mode panel_resource_creation` providing the samplesheet, as well as the relevant manually created files +to arguments `--driver_gene_panel`, `--target_regions_bed`, and `--isofox_gene_ids`: ```bash nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ + -revision 2.2.0 \ + -config reference_data.config \ -profile docker \ - --mode wgts \ - --processes_manual \ - --processes_include isofox,redux,amber,cobalt,sage,pave,esvee,purple,linx,lilac,neo \ + --mode panel_resource_creation \ --genome GRCh38_hmf \ - --input samplesheet.neo_inputs.csv \ + --input samplesheet.panel_resource_creation.csv \ + --driver_gene_panel DriverGenePanel.38.tsv \ + --target_regions_bed target_regions_definition.38.bed.gz \ + --isofox_gene_ids rna_gene_ids.csv \ # Optional, only provide if panel supports RNA sequencing data + --outdir output/ +``` + +Place the all the custom panel reference data files in a directory, and define the paths / file names in a configuration file: + +```groovy title="panel.config" +params { + ref_data_panel_data_path = "/directory/containing/my_custom_panel_resources/" + + // These are relative paths within the dir provided by `ref_data_panel_data_path` above + panel_data_paths { + + my_custom_panel { // This is the name that should be passed to the `--panel` argument + + // Genome version: '37' or '38' + '38' { + driver_gene_panel = 'DriverGenePanel.38.tsv' + pon_artefacts = 'pave.somatic_artefacts.38.tsv' + target_region_bed = 'target_regions_definition.38.bed.gz' + target_region_normalisation = 'cobalt.region_normalisation.38.tsv' + target_region_ratios = 'target_regions_ratios.38.tsv' + target_region_msi_indels = 'target_regions_msi_indels.38.tsv' + + // RNA. Optional, only provide if panel supports RNA data. + isofox_gene_ids = 'rna_gene_ids.csv' + isofox_tpm_norm = 'isofox.gene_normalisation.38.csv' + isofox_counts = 'read_151_exp_counts.37.csv' + isofox_gc_ratios = 'read_100_exp_gc_ratios.37.csv' + } + } + } +} +``` + +Lastly, run `oncoanalyser` with `--mode targeted` to analyse your panel sequencing sample. You will also need to: + +- provide the custom panel reference data configuration file to the `-config ` argument +- set the panel name in the `--panel ` argument as defined in the configuration file (e.g. `my_custom_panel`) +- set the `--force_panel` argument to enable non-built-in panels + +```bash +nextflow run nf-core/oncoanalyser \ + -revision 2.2.0 \ + -config reference_data.config \ + -config panel_data.config \ + -profile docker \ + --mode targeted \ + --panel my_custom_panel \ + --genome GRCh38_hmf \ + --force_panel \ + --input samplesheet.csv \ + --outdir output/ +``` + +### Purity estimate + +`--mode purity_estimate` uses [WISP](https://github.com/hartwigmedical/hmftools/tree/master/wisp) to estimate the tumor fraction +(aka purity) for a longitudinal sample (typically a ctDNA sample) guided by variants identified in a primary sample of the same patient +(typically a primary tissue biopsy). This can be used for example for detecting minimal residual disease (MRD). + +The primary sample must first have been run in either [**WGTS**](#whole-genome--transcriptome-sequencing-wgts) or +[**targeted**](#targeted-sequencing) mode. + +A samplesheet with the paths to the primary and longitudinal sample data is then created. Specifically: + +- The BAM from the longitudinal tumor sample +- The AMBER and PURPLE directories from the **primary tumor** sample +- (Optional) The REDUX BAM of the normal sample, if the normal sample was provided in the primary sample run (i.e. was run in tumor/normal mode) + +```csv title="samplesheet.purity_estimate.csv" +group_id,subject_id,sample_id,sample_type,sequence_type,filetype,info,filepath +PATEINT1,PATIENT1,PATIENT1-L,tumor,dna,bam,longitudinal_sample,/path/to/PATIENT1-T.dna.longitudinal.bam +PATIENT1,PATIENT1,PATIENT1-N,normal,dna,bam_redux,,/path/to/PATIENT1-N.dna.redux.bam +PATEINT1,PATIENT1,PATIENT1-T,tumor,dna,amber_dir,,/path/to/PATIENT1-T/amber/ +PATEINT1,PATIENT1,PATIENT1-T,tumor,dna,purple_dir,,/path/to/PATIENT1-T/purple/ +``` + +Then run `oncoanalyser` providing `--mode purity_estimate` and `--purity_estimate_mode ` (how the **longitudinal sample** +was sequenced): + +```bash +nextflow run nf-core/oncoanalyser \ + -revision 2.2.0 \ + -config reference_data.config \ + -profile docker \ + --mode purity_estimate \ + --purity_estimate_mode targeted \ + --genome GRCh38_hmf \ + --input samplesheet.purity_estimate.csv \ --outdir output/ ``` +:::note + +`--purity_estimate_mode` simply sets different arguments for certain tools (e.g. SAGE). When running with `--purity_estimate_mode targeted`, +you do not need to configure panel ref data paths with as you would with `--mode targeted`. + +::: + +### Prepare reference data + +`--mode prepare_reference` assists with staging all the reference data required to run `oncoanalyser`. +Please see: [Staging reference data: Automatic staging](#automatic-staging) + +## Process selection + +It is possible to exclude or manually select specific processes when running `oncoanalyser`. The full list of processes that can +be selected is available [here](https://github.com/nf-core/oncoanalyser/blob/2.2.0/lib/Constants.groovy#L53). + :::warning It is the user's responsibility to select the required upstream processes for a downstream process to run. If not all @@ -631,6 +753,38 @@ process running. ::: +### Excluding processes + +Most of the major components in `oncoanalyser` can be skipped using the `--processes_exclude` argument. You may want to +skip resource intensive processes like Virusbreakend, or ORANGE because you do not require the report, for example: + +```bash +nextflow run nf-core/oncoanalyser \ + -revision 2.2.0 \ + -profile docker \ + --mode wgts \ + --processes_exclude virusinterpreter,orange \ + --genome GRCh38_hmf \ + --input samplesheet.csv \ + --outdir output/ +``` + +### Manual process selection + +The `--processes_manual` argument can be used to select the exact processes that `onconalyser` will run. For example, +you may only want to run alignment and SNV/indel, SV and CNV calling from DNA FASTQs, like so: + +```bash +nextflow run nf-core/oncoanalyser \ + -revision 2.2.0 \ + -profile docker \ + --mode wgts \ + --processes_manual alignment,redux,sage,amber,cobalt,esvee,sage,pave,purple \ + --genome GRCh38_hmf \ + --input samplesheet.csv \ + --outdir output/ +``` + ### Starting from existing inputs An `oncoanalyser` analysis can start at arbitrary points as long as the required inputs are provided. For example, @@ -662,23 +816,15 @@ Then, run `oncoanalyser` skipping all processes except for `neo`: ```bash nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ + -revision 2.2.0 \ -profile docker \ --mode wgts \ - --processes_manual \ - --processes_include neo \ + --processes_manual neo \ --genome GRCh38_hmf \ --input samplesheet.neo_inputs.csv \ --outdir output/ ``` -:::warning - -Providing existing inputs will cause `oncoanalyser` to skip the corresponding process but none of the upstream -processes. It is the responsibility of the user to skip all relevant processes. - -::: - ## Core Nextflow arguments :::note @@ -753,51 +899,58 @@ Syntax and examples of config items are described in the [Nextflow documentation ### Compute resources The default compute resources (e.g. CPUs, RAM, disk space) configured in `oncoanalyser` may not be sufficient for one or -more processes. To change the resource requests, please see the [tuning workflow -resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) and [max -resources](https://nf-co.re/docs/usage/configuration#max-resources) sections of the nf-core website. +more processes (nf-core documentation: [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources)). +For example, for high depth samples (e.g. panel samples), you may need increase the memory for alignment, read processing (REDUX), +small variant calling (SAGE), or structural variant calling (ESVEE) steps. -Below are the settings per WiGiTS tool that Hartwig uses internally and recommends. For high depth samples (e.g. panel -samples), you may need increase the memory for alignment, read processing (REDUX) and/or variant calling (SAGE or ESVEE) -steps. +Below are the settings per tool that Hartwig Medical Foundation uses when running `oncoanalyser` in Google cloud: ```groovy process { - withName: '.*ALIGN' { cpus = 12; memory = 72.GB; } - withName: AMBER { cpus = 16; memory = 24.GB; } - withName: BAMTOOLS { cpus = 16; memory = 24.GB; } - withName: CHORD { cpus = 4; memory = 12.GB; } - withName: COBALT { cpus = 16; memory = 24.GB; } - withName: CUPPA { cpus = 4; memory = 16.GB; } - withName: 'ESVEE.*' { cpus = 32; memory = 64.GB; } - withName: LILAC { cpus = 16; memory = 24.GB; } - withName: 'LINX.*' { cpus = 16; memory = 16.GB; } - withName: REDUX { cpus = 32; memory = 64.GB; } - withName: ORANGE { cpus = 4; memory = 16.GB; } - withName: 'PAVE.*' { cpus = 8; memory = 32.GB; } - withName: PURPLE { cpus = 8; memory = 40.GB; } - withName: 'SAGE.*' { cpus = 32; memory = 64.GB; } - withName: VIRUSBREAKEND { cpus = 8; memory = 64.GB; } - withName: VIRUSINTERPRETER { cpus = 2; memory = 8.GB; } + withName: '.*ALIGN' { memory = 72.GB; cpus = 12; disk = 750.GB } + withName: 'AMBER' { memory = 24.GB; cpus = 16; disk = 375.GB } + withName: 'BAMTOOLS' { memory = 24.GB; cpus = 16; disk = 375.GB } + withName: 'CHORD' { memory = 12.GB; cpus = 4 ; disk = 375.GB } + withName: 'CIDER' { memory = 24.GB; cpus = 16; disk = 375.GB } + withName: 'COBALT' { memory = 24.GB; cpus = 16; disk = 375.GB } + withName: 'CUPPA' { memory = 16.GB; cpus = 4 ; disk = 375.GB } + withName: 'ESVEE' { memory = 96.GB; cpus = 32; disk = 375.GB } + withName: 'ISOFOX' { memory = 24.GB; cpus = 16; disk = 375.GB } + withName: 'LILAC' { memory = 24.GB; cpus = 16; disk = 375.GB } + withName: 'LINX_.*' { memory = 16.GB; cpus = 8 ; disk = 375.GB } + withName: 'REDUX' { memory = 64.GB; cpus = 32; disk = 750.GB } + withName: 'ORANGE' { memory = 16.GB; cpus = 4 ; disk = 375.GB } + withName: 'PAVE.*' { memory = 32.GB; cpus = 8 ; disk = 375.GB } + withName: 'PEACH' { memory = 4.GB ; cpus = 2 ; disk = 375.GB } + withName: 'PURPLE' { memory = 40.GB; cpus = 8 ; disk = 375.GB } + withName: 'SAGE.*' { memory = 64.GB; cpus = 32; disk = 375.GB } + withName: 'TEAL.*' { memory = 32.GB; cpus = 32; disk = 375.GB } + withName: 'VIRUSBREAKEND' { memory = 64.GB; cpus = 16; disk = 375.GB } + withName: 'VIRUSINTERPRETER' { memory = 8.GB ; cpus = 2 ; disk = 375.GB } + withName: 'WISP' { memory = 16.GB; cpus = 4 ; disk = 375.GB } } ``` -Lastly, we recommend setting an upper limit on total resources that `oncoanalyser` is allowed to use. This will -typically be the max resources available to the VM / compute job. Below are the settings that Hartwig Medical Foundation -uses internally. When running multiple steps and/or samples in parallel, this will prevent `oncoanalyser` from -requesting more resources than available on the machine. +We recommend setting an upper limit on total resources that `oncoanalyser` is allowed to use (nf-core +documentation: [max resources](https://nf-co.re/docs/usage/configuration#max-resources)). Otherwise, `oncoanalyser` may +crash when it tries to request more resources than available on a machine or compute job. +Below are some recommended resource limit settings: ```groovy process { resourceLimits = [ cpus: 64, - memory: 124.GB, // = 0.97 * 128.GB + memory: 120.GB, // Provides leeway on a 128.GB machine disk: 1500.GB, time: 48.h ] } ``` +The total runtime of `oncoanalyser` is ~3h for a paired 100x/30x tumor/normal WGS run starting from BAMs with parallel job execution via +Google batch. However, your runtime will vary depending on several factors such as sequencing depth, number of small/structural variants, or +parallel vs. non-parallel job execution. + ### Container images #### Custom containers @@ -905,13 +1058,13 @@ on the presence/format of your UMI strings, you may need to configure one or mor ```groovy title='umi.config' params { // For FASTQ files - fastp_umi = true // Enable UMI processing by fastp + fastp_umi_enabled = true // Enable UMI processing by fastp fastp_umi_location = "per_read" // --umi_loc fastp arg fastp_umi_length = 7 // --umi_len fastp arg fastp_umi_skip = 0 // --umi_skip fastp arg // For BAM files - redux_umi = true // Enable UMI processing by REDUX + redux_umi_enabled = true // Enable UMI processing by REDUX redux_umi_duplex_delim = "_" // Duplex UMI delimiter } ``` diff --git a/docs/usage/faq_and_troubleshooting.md b/docs/usage/faq_and_troubleshooting.md index 768f4f4a..0e773a4a 100644 --- a/docs/usage/faq_and_troubleshooting.md +++ b/docs/usage/faq_and_troubleshooting.md @@ -19,13 +19,12 @@ - [Placing `oncoanalyser` CLI arguments into a configuration file](#placing-oncoanalyser-cli-arguments-into-a-configuration-file) - [Errors and navigating the `work/` directory](#errors-and-navigating-the-work-directory) -- [Saving logs from the `work/` directory](#saving-logs-from-the-work-directory) - [Resuming runs in Google Batch](#resuming-runs-in-google-batch) ## How to start from CRAM? -Simply specify a CRAM path instead of a BAM path in the sample sheet. See section [Input starting points: BAM / -CRAM](./#bam-and-cram). +Simply provide a CRAM path under filetype `cram` in the sample sheet. See section [Input starting points: CRAM](./#cram) +for details. ## How to handle UMIs? @@ -94,14 +93,13 @@ example, you would run `oncoanalyser` with the below command (assuming starting ```bash nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ + -revision 2.2.0 \ -profile docker \ --mode wgts \ + --processes_manual alignment,redux,amber,cobalt,sage,pave,esvee,purple \ --genome GRCh38_hmf \ - --input samplesheet.neo_inputs.csv \ - --outdir output/ \ - --processes_manual \ - --processes_include alignment,redux,amber,cobalt,sage,pave,esvee,purple + --input samplesheet.csv \ + --outdir output/ ``` ## Why does `oncoanalyser` call too many / too few variants than another pipeline? @@ -213,13 +211,13 @@ For example, the `oncoanalyser` arguments which start with `--` in this command: ```shell nextflow run nf-core/oncoanalyser \ - -revision 2.1.0 \ - -config refdata.config \ + -revision 2.2.0 \ -profile docker \ + -config refdata.config \ --mode wgts \ --genome GRCh38_hmf \ - --input /path/to/samplesheet.csv \ - --outdir /path/to/outdir/ + --input samplesheet.csv \ + --outdir output/ ``` can be specified in a config file by stripping the `--` like so: @@ -228,8 +226,8 @@ can be specified in a config file by stripping the `--` like so: params { mode = "wgts" genome = "GRCh38_hmf" - input = "/path/to/samplesheet.csv" - outdir = "/path/to/outdir/" + input = "samplesheet.csv" + outdir = "outdir/" } ``` @@ -237,9 +235,9 @@ and provided as a config file when running `oncoanalyser`: ```shell nextflow run nf-core/oncoanalyser \ + -revision 2.2.0 \ -config refdata.config \ -config params.config \ - -revision 2.1.0 \ -profile docker \ <...> ``` @@ -271,7 +269,7 @@ work/ │ ├── .command.sh # Bash script used to run the process *within the container* │ ├── .command.run # Bash script used to run the process in the host machine │ ├── .command.begin -│ ├── .command.log # All log messages (combination of stdout and stderr) +│ ├── .command.log # All log messages (combination of stdout and stderr). Might not exist for some executors │ ├── .command.err # stderr log messages │ ├── .command.out # stdout log messages │ ├── .command.trace # Compute resource usage stats @@ -291,32 +289,6 @@ The `work/` directory can be hard to navigate due to the `/) to show the directory structure, which allows you to manually find the target process directory. -## Saving logs from the `work/` directory - -To save logs to the final output directory (i.e. path provided to `--outdir`), we can provide the below -[afterScript](https://www.nextflow.io/docs/latest/reference/process.html#afterscript) directive in a config file: - -```groovy -// Adapted from this GitHub issue: https://github.com/nextflow-io/nextflow/issues/1166 -process.afterScript = { - // params.outdir: --outdir arg - // meta.key: sample_id from the sample sheet - log_dir = "${params.outdir}/${meta.key}/logs" - - // task.process: name of the process - // meta.id: concatenation of the group_id and sample_id from the sample sheet - dest_file_prefix = "${log_dir}/${task.process}.${meta.id}" - - // The value of afterScript is simply a bash command as a string - cmd = "mkdir -p ${log_dir}; " - cmd += "for file in .command.{sh,log}; do cp \$file ${dest_file_prefix}\${file}; done" - cmd -} -``` - -The above afterScript directive will copy `.sh` and `.log` files from the `work/` directory for every process. Each -destination file will have the below example path: - ```shell outdir/coloMini/logs/NFCORE_ONCOANALYSER:WGTS:REDUX_PROCESSING:REDUX.coloMini_coloMiniT.command.log ``` diff --git a/lib/Constants.groovy b/lib/Constants.groovy index 32decfee..8fa6eeb5 100644 --- a/lib/Constants.groovy +++ b/lib/Constants.groovy @@ -3,19 +3,19 @@ class Constants { // NOTE(SW): the HMF reference data files are incompatible with hg19 due to different contig naming static List GENOMES_VERSION_37 = ['GRCh37_hmf', 'GRCh37'] static List GENOMES_VERSION_38 = ['GRCh38_hmf', 'GRCh38', 'hg38'] - static List GENOMES_ALT = ['GRCh38', 'hg38'] + static List GENOMES_ALT = ['GRCh38', 'hg38'] - static List GENOMES_SUPPORTED = ['GRCh37_hmf', 'GRCh38_hmf'] - static List GENOMES_DEFINED = Constants.GENOMES_VERSION_37 + Constants.GENOMES_VERSION_38 + static List GENOMES_SUPPORTED = ['GRCh37_hmf', 'GRCh38_hmf'] + static List GENOMES_DEFINED = Constants.GENOMES_VERSION_37 + Constants.GENOMES_VERSION_38 - static List PANELS_DEFINED = ['tso500'] + static List PANELS_DEFINED = ['tso500'] - static String HMF_DATA_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.37_v2.1.0--1.tar.gz' - static String HMF_DATA_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.38_v2.1.0--1.tar.gz' + static String HMF_DATA_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.37_v2.2.0--3.tar.gz' + static String HMF_DATA_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/hmftools/hmf_pipeline_resources.38_v2.2.0--3.tar.gz' - static String TSO500_PANEL_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.37_v2.0.0--3.tar.gz' - static String TSO500_PANEL_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.38_v2.0.0--3.tar.gz' + static String TSO500_PANEL_37_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.37_v2.2.0--3.tar.gz' + static String TSO500_PANEL_38_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/hmf_reference_data/panels/hmf_panel_resources.tso500.38_v2.2.0--3.tar.gz' static String HLA_SLICE_BED_GRCH38_ALT_PATH = 'https://pub-cf6ba01919994c3cbd354659947f74d8.r2.dev/other/hla_slice/grch38_alt.plus_homologous.bed' @@ -25,10 +25,33 @@ class Constants { static enum RunMode { + PANEL_RESOURCE_CREATION, + PREPARE_REFERENCE, + PURITY_ESTIMATE, TARGETED, WGTS, } + static enum RefDataType { + // Compound types + TARGETED, + WGS, + WTS, + + // Individual types + BWAMEM2_INDEX, + DICT, + DNA_ALIGNMENT, + FAI, + FASTA, + GRIDSS_INDEX, + HMFTOOLS, + IMG, + PANEL, + RNA_ALIGNMENT, + STAR_INDEX, + } + static enum Process { ALIGNMENT, AMBER, @@ -51,18 +74,25 @@ class Constants { SIGS, TEAL, VIRUSINTERPRETER, + WISP, } + static List DEFAULT_EXCLUDED_PROCESSES = [] // For experimental tools + static enum FileType { // Generic - BAM, BAI, + BAM, + CRAI, + CRAM, FASTQ, - // Redux + + // REDUX BAM_REDUX, - REDUX_DUP_FREQ_TSV, + CRAM_REDUX, REDUX_JITTER_TSV, REDUX_MS_TSV, + // Process AMBER_DIR, BAMTOOLS_DIR, @@ -76,34 +106,36 @@ class Constants { PURPLE_DIR, SAGE_VCF, SAGE_VCF_TBI, - SAGE_APPEND_VCF, + SAGE_APPEND_DIR, VIRUSINTERPRETER_DIR, + // ORANGE specific CHORD_DIR, - SIGS_DIR, CUPPA_DIR, LINX_PLOT_DIR, - SAGE_DIR, PEACH_DIR, + SAGE_DIR, + SIGS_DIR, } static enum SampleType { - TUMOR, + DONOR, NORMAL, + TUMOR, TUMOR_NORMAL, - DONOR, } static enum SequenceType { DNA, - RNA, DNA_RNA, + RNA, } static enum InfoField { CANCER_TYPE, LANE, LIBRARY_ID, + LONGITUDINAL_SAMPLE, } static Map PLACEHOLDER_META = [meta_placeholder: null] @@ -168,12 +200,6 @@ class Constants { SequenceType.DNA, ], - REDUX_DUP_FREQ_TSV_TUMOR: [ - FileType.REDUX_DUP_FREQ_TSV, - SampleType.TUMOR, - SequenceType.DNA, - ], - REDUX_JITTER_TSV_TUMOR: [ FileType.REDUX_JITTER_TSV, SampleType.TUMOR, @@ -192,12 +218,6 @@ class Constants { SequenceType.DNA, ], - REDUX_DUP_FREQ_TSV_NORMAL: [ - FileType.REDUX_DUP_FREQ_TSV, - SampleType.NORMAL, - SequenceType.DNA, - ], - REDUX_JITTER_TSV_NORMAL: [ FileType.REDUX_JITTER_TSV, SampleType.NORMAL, @@ -216,12 +236,6 @@ class Constants { SequenceType.DNA, ], - REDUX_DUP_FREQ_TSV_DONOR: [ - FileType.REDUX_DUP_FREQ_TSV, - SampleType.DONOR, - SequenceType.DNA, - ], - REDUX_JITTER_TSV_DONOR: [ FileType.REDUX_JITTER_TSV, SampleType.DONOR, @@ -294,13 +308,13 @@ class Constants { SampleType.NORMAL, SequenceType.DNA, ], - SAGE_APPEND_VCF_TUMOR: [ - FileType.SAGE_APPEND_VCF, + SAGE_APPEND_DIR_TUMOR: [ + FileType.SAGE_APPEND_DIR, SampleType.TUMOR, SequenceType.DNA_RNA, ], - SAGE_APPEND_VCF_NORMAL: [ - FileType.SAGE_APPEND_VCF, + SAGE_APPEND_DIR_NORMAL: [ + FileType.SAGE_APPEND_DIR, SampleType.NORMAL, SequenceType.DNA_RNA, ], diff --git a/lib/Processes.groovy b/lib/Processes.groovy index f4770838..9bdd032f 100644 --- a/lib/Processes.groovy +++ b/lib/Processes.groovy @@ -8,22 +8,30 @@ class Processes { public static getRunStages(include, exclude, manual_select, log) { - // Get default processes - // NOTE(SW): currently set all except Neo to run by default; Process.NEO excluded to be more concise in code def processes + if (manual_select) { - processes = [] + processes = this.getProcessList(manual_select, log) + + if (include || exclude) { + log.warning "When manually selecting processes, including/excluding processes is ignored" + } + } else { + + // Get default processes processes = Constants.Process.values().toList() - processes.remove(Constants.Process.NEO) - } - def include_list = this.getProcessList(include, log) - def exclude_list = this.getProcessList(exclude, log) - this.checkIncludeExcludeList(include_list, exclude_list, log) + // NOTE(LN): Disable some processes from running by default + Constants.DEFAULT_EXCLUDED_PROCESSES.each {it -> processes.remove(it) } - processes.addAll(include_list) - processes.removeAll(exclude_list) + def include_list = this.getProcessList(include, log) + def exclude_list = this.getProcessList(exclude, log) + this.checkIncludeExcludeList(include_list, exclude_list, log) + + processes.addAll(include_list) + processes.removeAll(exclude_list) + } return Constants.Process .values() diff --git a/lib/Utils.groovy b/lib/Utils.groovy index e2ef6d4b..cfe40aad 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -8,6 +8,11 @@ class Utils { public static parseInput(input_fp_str, stub_run, log) { + if (!input_fp_str) { + log.error "Missing required --input argument" + Nextflow.exit(1) + } + // NOTE(SW): using NF .splitCsv channel operator, hence should be easily interchangable with NF syntax def input_fp = Utils.getFileObject(input_fp_str) @@ -28,6 +33,37 @@ class Utils { meta.subject_id = it.subject_id } + // Info data + def info_data = [:] + if (it.containsKey('info')) { + // Parse + it.info + .tokenize(';') + .each { e -> + def (k, v) = e.tokenize(':') + def info_field_enum = Utils.getEnumFromString(k, Constants.InfoField) + + if (!info_field_enum) { + def info_field_str = Utils.getEnumNames(Constants.InfoField).join('\n - ') + log.error "received invalid info field: '${k}'. Valid options are:\n - ${info_field_str}" + Nextflow.exit(1) + } + + if (info_data.containsKey(info_field_enum)) { + log.error "got duplicate info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${info_field_enum}" + Nextflow.exit(1) + } + + info_data[info_field_enum] = v + } + + // Process + if (info_data.containsKey(Constants.InfoField.CANCER_TYPE)) { + meta[Constants.InfoField.CANCER_TYPE] = info_data[Constants.InfoField.CANCER_TYPE] + } + + } + // Sample type def sample_type_enum = Utils.getEnumFromString(it.sample_type, Constants.SampleType) if (!sample_type_enum) { @@ -53,49 +89,33 @@ class Utils { } def sample_key = [sample_type_enum, sequence_type_enum] - def meta_sample = meta.get(sample_key, [sample_id: it.sample_id]) + def meta_sample = meta.get(sample_key, [:]) - if (meta_sample.sample_id != it.sample_id) { - log.error "got unexpected sample name for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${it.sample_id}" - Nextflow.exit(1) - } + if (info_data.containsKey(Constants.InfoField.LONGITUDINAL_SAMPLE)) { - if (meta_sample.containsKey(filetype_enum) & filetype_enum != Constants.FileType.FASTQ) { - log.error "got duplicate file for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${filetype_enum}" - Nextflow.exit(1) - } + if (meta_sample.containsKey('longitudinal_sample_id') && meta_sample.longitudinal_sample_id != it.sample_id) { + log.error "got multiple longitudinal samples for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${it.sample_id}" + Nextflow.exit(1) + } - // Info data - def info_data = [:] - if (it.containsKey('info')) { - // Parse - it.info - .tokenize(';') - .each { e -> - def (k, v) = e.tokenize(':') - def info_field_enum = Utils.getEnumFromString(k, Constants.InfoField) + meta_sample.longitudinal_sample_id = it.sample_id - if (!info_field_enum) { - def info_field_str = Utils.getEnumNames(Constants.InfoField).join('\n - ') - log.error "received invalid info field: '${k}'. Valid options are:\n - ${info_field_str}" - Nextflow.exit(1) - } + } else if (meta_sample.containsKey('sample_id') && meta_sample.sample_id != it.sample_id) { - if (info_data.containsKey(info_field_enum)) { - log.error "got duplicate info field for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${info_field_enum}" - Nextflow.exit(1) - } + log.error "got unexpected sample name for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${it.sample_id}" + Nextflow.exit(1) - info_data[info_field_enum] = v - } + } else { - // Process - if (info_data.containsKey(Constants.InfoField.CANCER_TYPE)) { - meta[Constants.InfoField.CANCER_TYPE] = info_data[Constants.InfoField.CANCER_TYPE] - } + meta_sample.sample_id = it.sample_id } + // Filetype uniqueness + if (meta_sample.containsKey(filetype_enum) & filetype_enum != Constants.FileType.FASTQ) { + log.error "got duplicate file for ${group_id} ${sample_type_enum}/${sequence_type_enum}: ${filetype_enum}" + Nextflow.exit(1) + } // Handle inputs appropriately if (filetype_enum === Constants.FileType.FASTQ) { @@ -110,7 +130,15 @@ class Utils { Nextflow.exit(1) } - def (fwd, rev) = it.filepath.tokenize(';') + def fastq_entries = it.filepath.tokenize(';') + + if (fastq_entries.size() != 2) { + log.error "expected exactly 2 FASTQ files delimited by ';' (i.e. ';') but found ${fastq_entries.size} " + + " files for ${group_id} ${sample_type_enum}/${sequence_type_enum} but found ${fastq_entries.size} files" + Nextflow.exit(1) + } + + def (fwd, rev) = fastq_entries def fastq_key = [info_data[Constants.InfoField.LIBRARY_ID], info_data[Constants.InfoField.LANE]] if (meta_sample.containsKey(fastq_key)) { @@ -122,7 +150,7 @@ class Utils { meta_sample[filetype_enum] = [:] } - meta_sample[filetype_enum][fastq_key] = ['fwd': fwd, 'rev': rev] + meta_sample[filetype_enum][fastq_key] = ['fwd': Utils.getFileObject(fwd), 'rev': Utils.getFileObject(rev)] } else { @@ -144,12 +172,12 @@ class Utils { def index_enum def index_str - if (key === Constants.FileType.BAM) { - index_enum = Constants.FileType.BAI - index_str = (meta[sample_key][key].toString().endsWith('cram')) ? 'crai' : 'bai' - } else if (key === Constants.FileType.BAM_REDUX) { + if (key === Constants.FileType.BAM || key === Constants.FileType.BAM_REDUX) { index_enum = Constants.FileType.BAI index_str = 'bai' + } else if (key === Constants.FileType.CRAM || key === Constants.FileType.CRAM_REDUX) { + index_enum = Constants.FileType.CRAI + index_str = 'crai' } else if (key === Constants.FileType.ESVEE_VCF) { index_enum = Constants.FileType.ESVEE_VCF_TBI index_str = 'tbi' @@ -174,46 +202,67 @@ class Utils { } meta[sample_key][index_enum] = index_fp + } + } + + // CRAMs are passed to hmftools as if they were BAMs, e.g. `-bam_file /path/to/tumor.cram` + // We therefore set the BAM/BAI path to be the CRAM/CRAI path + sample_keys.each { sample_key -> + + def meta_sample = meta[sample_key] + if (meta_sample.containsKey(Constants.FileType.CRAM_REDUX)) { + meta_sample[Constants.FileType.BAM_REDUX] = meta_sample.remove(Constants.FileType.CRAM_REDUX) } + + if (meta_sample.containsKey(Constants.FileType.CRAM)) { + meta_sample[Constants.FileType.BAM] = meta_sample.remove(Constants.FileType.CRAM) + } + + // The BAI key is used to store the index for both regular/REDUX CRAMs/BAMs + if (meta_sample.containsKey(Constants.FileType.CRAI)) { + meta_sample[Constants.FileType.BAI] = meta_sample.remove(Constants.FileType.CRAI) + } + } // Check that REDUX TSVs are present sample_keys.each { sample_key -> - if(stub_run) + if (stub_run) { return + } def meta_sample = meta[sample_key] - def sample_id = meta_sample.sample_id - if(!meta_sample.containsKey(Constants.FileType.BAM_REDUX)) + if (!meta_sample.containsKey(Constants.FileType.BAM_REDUX)) { return - - if(meta_sample.containsKey(Constants.FileType.BAM)) { - log.error "${Constants.FileType.BAM} and ${Constants.FileType.BAM_REDUX} provided for sample ${sample_id}. Please only provide one or the other" - Nextflow.exit(1) } def bam_path = meta_sample[Constants.FileType.BAM_REDUX] def bam_dir = bam_path.getParent().toUriString() // Get user specified TSV paths - def jitter_tsv = meta_sample[Constants.FileType.REDUX_JITTER_TSV] - def ms_tsv = meta_sample[Constants.FileType.REDUX_MS_TSV] + def jitter_tsv = meta_sample[Constants.FileType.REDUX_JITTER_TSV] + def ms_tsv = meta_sample[Constants.FileType.REDUX_MS_TSV] // If TSV paths not provided, default to TSV paths in the same dir as the BAM - jitter_tsv = jitter_tsv ?: "${bam_dir}/${sample_id}.jitter_params.tsv" - ms_tsv = ms_tsv ?: "${bam_dir}/${sample_id}.ms_table.tsv.gz" + def sample_id = meta_sample.getOrDefault('longitudinal_sample_id', meta_sample['sample_id']) + jitter_tsv = jitter_tsv ?: "${bam_dir}/${sample_id}.jitter_params.tsv" + ms_tsv = ms_tsv ?: "${bam_dir}/${sample_id}.ms_table.tsv.gz" - jitter_tsv = nextflow.Nextflow.file(jitter_tsv) - ms_tsv = nextflow.Nextflow.file(ms_tsv) + jitter_tsv = nextflow.Nextflow.file(jitter_tsv) + ms_tsv = nextflow.Nextflow.file(ms_tsv) def missing_tsvs = [:] - if(!jitter_tsv.exists()) missing_tsvs[Constants.FileType.REDUX_JITTER_TSV] = jitter_tsv - if(!ms_tsv.exists()) missing_tsvs[Constants.FileType.REDUX_MS_TSV] = ms_tsv + if (!jitter_tsv.exists()) { + missing_tsvs[Constants.FileType.REDUX_JITTER_TSV] = jitter_tsv + } + if (!ms_tsv.exists()) { + missing_tsvs[Constants.FileType.REDUX_MS_TSV] = ms_tsv + } - if(missing_tsvs.size() > 0){ + if (missing_tsvs.size() > 0) { def error_message = [] @@ -221,7 +270,8 @@ class Utils { error_message.add("${bam_path.toUriString()} (${Constants.FileType.BAM_REDUX})") missing_tsvs.each { error_message.add("${it.value} (missing expected ${it.key})") } error_message.add("") - error_message.add("Alternatively, provide the TSV paths in the sample sheet using filetype values: " + + error_message.add( + "Alternatively, provide the TSV paths in the sample sheet using filetype values: " + "${Constants.FileType.REDUX_JITTER_TSV}, " + "${Constants.FileType.REDUX_MS_TSV}" ) @@ -233,6 +283,18 @@ class Utils { // Set parsed REDUX TSV paths in metadata object meta_sample[Constants.FileType.REDUX_JITTER_TSV] = jitter_tsv meta_sample[Constants.FileType.REDUX_MS_TSV] = ms_tsv + + } + + // For purity estimation with WISP, require primary normal DNA BAM when an AMBER directory is provided + def meta_tumor_dna = meta.getOrDefault([Constants.SampleType.TUMOR, Constants.SequenceType.DNA], [:]) + def longitudinal = meta_tumor_dna.containsKey('longitudinal_sample_id') + def has_amber_dir = meta_tumor_dna.containsKey(Constants.FileType.AMBER_DIR) + def has_normal_dna_bam = Utils.hasNormalDnaBam(meta) || Utils.hasNormalDnaReduxBam(meta) + + if (longitudinal && has_amber_dir && !has_normal_dna_bam) { + log.error "AMBER input was provided without the required primary normal DNA BAM for ${meta.group_id}" + Nextflow.exit(1) } return meta @@ -259,7 +321,7 @@ class Utils { fps << "${params.ref_data_hmf_data_path.replaceAll('/$', '')}/${v}" } - if(params.panel !== null) { + if (params.panel !== null) { params.panel_data_paths[params.panel][params.genome_version.toString()] .each { k, v -> fps << "${params.ref_data_panel_data_path.replaceAll('/$', '')}/${v}" @@ -267,11 +329,15 @@ class Utils { } fps.each { fp_str -> - if (fp_str === null) return + if (fp_str === null) { + return + } def fp = Utils.getFileObject(fp_str) - if (!fp_str || fp.exists()) return + if (!fp_str || fp.exists()) { + return + } if (fp_str.endsWith('/')) { fp.mkdirs() @@ -303,19 +369,23 @@ class Utils { def (sample_type, sequence_type) = key - if (!meta[key].containsKey(Constants.FileType.BAM) && + if ( + !meta[key].containsKey(Constants.FileType.BAM) && !meta[key].containsKey(Constants.FileType.BAM_REDUX) && - !meta[key].containsKey(Constants.FileType.FASTQ)) { + !meta[key].containsKey(Constants.FileType.CRAM) && + !meta[key].containsKey(Constants.FileType.CRAM_REDUX) && + !meta[key].containsKey(Constants.FileType.FASTQ) + ) { - log.error "no BAMs nor BAM_MARKDUPs nor FASTQs provided for ${meta.group_id} ${sample_type}/${sequence_type}\n\n" + - "NB: BAMs or BAM_MARKDUPs or FASTQs are always required as they are the basis to determine input sample type." + log.error "no BAM/CRAM nor BAM_REDUX/CRAM_REDUX nor FASTQ files provided for ${meta.group_id} ${sample_type}/${sequence_type}\n\n" + + "NB: At least one of these files is required as they are the basis to determine input sample type." Nextflow.exit(1) } } // Do not allow donor sample without normal sample - if (Utils.hasDonorDna(meta) && ! Utils.hasNormalDna(meta)) { + if (Utils.hasDonorDna(meta) && !Utils.hasNormalDna(meta)) { log.error "a donor sample but not normal sample was found for ${meta.group_id}\n\n" + "Analysis with a donor sample requires a normal sample." Nextflow.exit(1) @@ -326,8 +396,8 @@ class Utils { // Do not allow donor DNA if (Utils.hasDonorDna(meta)) { - log.error "targeted mode is not compatible with the donor DNA BAM provided for ${meta.group_id}\n\n" + - "The targeted workflow supports only tumor and normal DNA BAMs (and tumor RNA BAMs for TSO500)" + log.error "targeted mode is not compatible with the donor DNA BAM/CRAM provided for ${meta.group_id}\n\n" + + "The targeted workflow supports only tumor and normal DNA BAM/CRAMs (and tumor RNA BAM/CRAMs for TSO500)" Nextflow.exit(1) } @@ -408,6 +478,15 @@ class Utils { Nextflow.exit(1) } + // Require --isofox_gene_ids argument to be provided in PANEL_RESOURCE_CREATION when RNA inputs are present + if (run_config.mode === Constants.RunMode.PANEL_RESOURCE_CREATION && run_config.has_rna && !params.isofox_gene_ids) { + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Running the panel resource creation workflow with RNA requires that the\n" + + " --isofox_gene_ids argument is set with an appropriate input file.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + } static public getEnumFromString(s, e) { @@ -459,8 +538,21 @@ class Utils { } // Sample names + static public getTumorDnaSampleName(Map named_args, meta) { + def meta_sample = getTumorDnaSample(meta) + def sample_id + + if (named_args.getOrDefault('primary', false)) { + sample_id = meta_sample['sample_id'] + } else { + sample_id = meta_sample.getOrDefault('longitudinal_sample_id', meta_sample['sample_id']) + } + + return sample_id + } + static public getTumorDnaSampleName(meta) { - return getTumorDnaSample(meta)['sample_id'] + getTumorDnaSampleName([:], meta) } static public getTumorRnaSampleName(meta) { diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 8cf99e57..dba90270 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -16,7 +16,6 @@ class WorkflowMain { def default_invalid = false // Set defaults common to all run configuration - if (!params.containsKey('genome_version')) { if (Constants.GENOMES_VERSION_37.contains(params.genome)) { params.genome_version = '37' @@ -62,30 +61,39 @@ class WorkflowMain { return } - if (run_mode === Constants.RunMode.TARGETED) { + // Attempt to set default panel data path; make no assumption on valid 'panel' value + if (run_mode === Constants.RunMode.TARGETED || run_mode === Constants.RunMode.PREPARE_REFERENCE) { - // Attempt to set default panel data path; make no assumption on valid 'panel' value if (params.containsKey('panel')) { - if (params.panel == 'tso500' && params.genome_version.toString() == '37') { - params.ref_data_panel_data_path = Constants.TSO500_PANEL_37_PATH - } else if (params.panel == 'tso500' && params.genome_version.toString() == '38') { - params.ref_data_panel_data_path = Constants.TSO500_PANEL_38_PATH + + if (params.panel == 'tso500') { + if (params.genome_version.toString() == '37') { + params.ref_data_panel_data_path = Constants.TSO500_PANEL_37_PATH + } else if (params.genome_version.toString() == '38') { + params.ref_data_panel_data_path = Constants.TSO500_PANEL_38_PATH + } } + } + } + + + if (run_mode === Constants.RunMode.TARGETED) { + // When fastp UMI is enabled, REDUX UMI should be as well - if (params.fastp_umi && (!params.containsKey('redux_umi') || !params.redux_umi)) { - params.redux_umi = true + if (params.fastp_umi_enabled && (!params.containsKey('redux_umi_enabled') || !params.redux_umi_enabled)) { + params.redux_umi_enabled = true } // Set the REDUX UMI duplex delimiter to '_' when the following conditions are met: // - both fastp and REDUX UMI processing enabled // - fastp is using a duplex UMI location type (per_index or per_read) // - no REDUX duplex delimiter has been set - def fastp_and_redux_umi = params.fastp_umi && params.redux_umi + def fastp_and_redux_umi_enabled = params.fastp_umi_enabled && params.redux_umi_enabled def fastp_duplex_location = params.containsKey('fastp_umi_location') && (params.fastp_umi_location == 'per_index' || params.fastp_umi_location == 'per_read') def no_umi_duplex_delim = !params.containsKey('redux_umi_duplex_delim') || !params.redux_umi_duplex_delim - if (fastp_and_redux_umi && fastp_duplex_location && no_umi_duplex_delim) { + if (fastp_and_redux_umi_enabled && fastp_duplex_location && no_umi_duplex_delim) { params.redux_umi_duplex_delim = '_' } @@ -193,6 +201,18 @@ class WorkflowMain { def run_mode = Utils.getRunMode(params.mode, log) + if (run_mode === Constants.RunMode.PREPARE_REFERENCE && params.ref_data_types == null) { + + def ref_data_types = Utils.getEnumNames(Constants.RefDataType).join('\n - ') + + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " CLI argument --ref_data_types is required for mode prepare_reference.\n" + + " Please specify one or more of the below valid values (separated by commas)\n" + + " - ${ref_data_types}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + if (run_mode === Constants.RunMode.TARGETED) { if (!params.containsKey('panel') || params.panel === null) { @@ -200,7 +220,7 @@ class WorkflowMain { def panels = Constants.PANELS_DEFINED.join('\n - ') log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " A panel is required to be set using the --panel CLI argument or in a\n" + - " configuration file when running in targeted mode.\n" + + " configuration file when running in targeted mode or panel resource creation mode.\n" + " Currently, the available built-in panels are:\n" + " - ${panels}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" @@ -224,6 +244,30 @@ class WorkflowMain { } } + if (run_mode === Constants.RunMode.PURITY_ESTIMATE) { + + def purity_estimate_modes = [Constants.RunMode.WGTS, Constants.RunMode.TARGETED] + + def purity_mode_enum = !params.purity_estimate_mode + ? null + : Utils.getEnumFromString(params.purity_estimate_mode, Constants.RunMode) + + if (!purity_mode_enum || !purity_estimate_modes.contains(purity_mode_enum)) { + + def purity_estimate_modes_str = purity_estimate_modes + .collect { e -> e.name().toLowerCase() } + .join('\n - ') + + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " A valid purity estimate run mode must be set using the --purity_estimate_mode\n" + + " CLI argument or in a configuration file.\n" + + " Currently, the available run modes are:\n" + + " - ${purity_estimate_modes_str}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + } + if (params.ref_data_genome_alt !== null) { if (params.genome_type != 'alt') { log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + @@ -247,17 +291,17 @@ class WorkflowMain { // UMI parameters def fastp_umi_args_set_any = params.fastp_umi_location || params.fastp_umi_length || params.fastp_umi_skip >= 0 - if (fastp_umi_args_set_any && !params.fastp_umi) { + if (fastp_umi_args_set_any && !params.fastp_umi_enabled) { log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Detected use of fastp UMI parameters but fastp UMI processing has not been enabled.\n" + - " Please review your configuration and set the fastp_umi flag or otherwise adjust\n" + - " accordingly.\n" + + " Please review your configuration and set the fastp_umi_enabled flag or otherwise " + + " adjust accordingly.\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" Nextflow.exit(1) } def fastp_umi_args_set_all = params.fastp_umi_location && params.fastp_umi_length && params.fastp_umi_skip >= 0 - if (params.fastp_umi && !fastp_umi_args_set_all) { + if (params.fastp_umi_enabled && !fastp_umi_args_set_all) { log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Refusing to run fastp UMI processing without having any UMI params configured.\n" + " Please review your configuration and appropriately set all fastp_umi_* parameters.\n" + @@ -265,10 +309,10 @@ class WorkflowMain { Nextflow.exit(1) } - if (params.redux_umi_duplex_delim && params.redux_umi === false) { + if (params.redux_umi_duplex_delim && params.redux_umi_enabled === false) { log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Detected use of REDUX UMI parameters but REDUX UMI processing has not been\n" + - " enabled. Please review your configuration and set the redux_umi flag or\n" + + " enabled. Please review your configuration and set the redux_umi_enabled flag or\n" + " otherwise adjust accordingly.\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" Nextflow.exit(1) @@ -289,7 +333,6 @@ class WorkflowMain { return [ mode: run_mode, - panel: run_mode === Constants.RunMode.TARGETED ? params.panel : null, stages: stages, has_dna: inputs.any { Utils.hasTumorDna(it) }, has_rna: inputs.any { Utils.hasTumorRna(it) }, @@ -297,4 +340,98 @@ class WorkflowMain { has_dna_fastq: inputs.any { Utils.hasTumorDnaFastq(it) || Utils.hasNormalDnaFastq(it) }, ] } + + public static getPrepConfigFromSamplesheet(run_config) { + return [ + prepare_ref_data_only: false, + + require_fasta: true, + require_fai: true, + require_dict: true, + require_img: true, + + require_bwamem2_index: run_config.has_dna_fastq && run_config.stages.alignment, + require_star_index: run_config.has_rna_fastq && run_config.stages.alignment, + + require_gridss_index: run_config.has_dna && run_config.mode === Constants.RunMode.WGTS && run_config.stages.virusinterpreter, + require_hmftools_data: true, + require_panel_data: run_config.mode === Constants.RunMode.TARGETED, + ] + } + + public static getPrepConfigFromCli(params, log) { + def ref_data_types = params.ref_data_types + .tokenize(',') + .collect { + def ref_data_type_enum = Utils.getEnumFromString(it, Constants.RefDataType) + + if (!ref_data_type_enum) { + def ref_data_type_str = Utils.getEnumNames(Constants.RefDataType).join('\n - ') + log.error "received invalid ref data type: '${it}'. Valid options are:\n - ${ref_data_type_str}" + Nextflow.exit(1) + } + + return ref_data_type_enum + } + + if ( + ref_data_types.contains(Constants.RefDataType.WGS) || + ref_data_types.contains(Constants.RefDataType.WTS) || + ref_data_types.contains(Constants.RefDataType.TARGETED) + ) { + ref_data_types += [ + Constants.RefDataType.FASTA, + Constants.RefDataType.FAI, + Constants.RefDataType.DICT, + Constants.RefDataType.IMG, + Constants.RefDataType.HMFTOOLS + ] + } + + if (ref_data_types.contains(Constants.RefDataType.WGS)) { + ref_data_types += [Constants.RefDataType.GRIDSS_INDEX] + } + + if (ref_data_types.contains(Constants.RefDataType.TARGETED)) { + ref_data_types += [Constants.RefDataType.PANEL] + } + + def require_fasta = ref_data_types.contains(Constants.RefDataType.FASTA) + def require_fai = ref_data_types.contains(Constants.RefDataType.FAI) + def require_dict = ref_data_types.contains(Constants.RefDataType.DICT) + def require_img = ref_data_types.contains(Constants.RefDataType.IMG) + + def require_bwamem2_index = ref_data_types.contains(Constants.RefDataType.BWAMEM2_INDEX) || ref_data_types.contains(Constants.RefDataType.DNA_ALIGNMENT) + def require_star_index = ref_data_types.contains(Constants.RefDataType.STAR_INDEX) || ref_data_types.contains(Constants.RefDataType.RNA_ALIGNMENT) + + def require_gridss_index = ref_data_types.contains(Constants.RefDataType.GRIDSS_INDEX) + def require_hmftools_data = ref_data_types.contains(Constants.RefDataType.HMFTOOLS) + def require_panel_data = ref_data_types.contains(Constants.RefDataType.PANEL) + + if (require_panel_data) { + if (params.panel == null) { + require_panel_data = false + log.warn "Skipping preparing panel specific reference data as --panel CLI argument was not provided" + } else if (!Constants.PANELS_DEFINED.contains(params.panel)) { + require_panel_data = false + log.warn "Skipping preparing panel specific reference data for custom panel: ${params.panel}" + } + } + + return [ + prepare_ref_data_only: true, + + require_fasta: require_fasta, + require_fai: require_fai, + require_dict: require_dict, + require_img: require_img, + + require_bwamem2_index: require_bwamem2_index, + require_star_index: require_star_index, + + require_gridss_index: require_gridss_index, + require_hmftools_data: require_hmftools_data, + require_panel_data: require_panel_data, + ] + } } diff --git a/main.nf b/main.nf index facb872d..e1e528c1 100644 --- a/main.nf +++ b/main.nf @@ -58,8 +58,11 @@ if (workflow.stubRun && params.create_stub_placeholders) { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { TARGETED } from './workflows/targeted' -include { WGTS } from './workflows/wgts' +include { PANEL_RESOURCE_CREATION } from './workflows/panel_resource_creation' +include { PREPARE_REFERENCE } from './workflows/prepare_reference' +include { PURITY_ESTIMATE } from './workflows/purity_estimate' +include { TARGETED } from './workflows/targeted' +include { WGTS } from './workflows/wgts' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -70,17 +73,35 @@ include { WGTS } from './workflows/wgts' // // WORKFLOW: Run main analysis pipeline depending on type of input // -run_mode = Utils.getRunMode(params.mode, log) workflow NFCORE_ONCOANALYSER { - if (run_mode === Constants.RunMode.WGTS) { - WGTS() - } else if (run_mode === Constants.RunMode.TARGETED) { - TARGETED() + // Get run mode + run_mode = Utils.getRunMode(params.mode, log) + + // Run selected workflow + // NOTE(SW): prepare reference is checked early as params.input is not required + if (run_mode === Constants.RunMode.PREPARE_REFERENCE) { + PREPARE_REFERENCE() } else { - log.error("received bad run mode: ${run_mode}") - Nextflow.exit(1) + // Parse and validate inputs + inputs = Utils.parseInput(params.input, workflow.stubRun, log) + run_config = WorkflowMain.getRunConfig(params, inputs, log) + Utils.validateInput(inputs, run_config, params, log) + + // Run requested workflow + if (run_mode === Constants.RunMode.WGTS) { + WGTS(inputs, run_config) + } else if (run_mode === Constants.RunMode.TARGETED) { + TARGETED(inputs, run_config) + } else if (run_mode === Constants.RunMode.PURITY_ESTIMATE) { + PURITY_ESTIMATE(inputs, run_config) + } else if (run_mode === Constants.RunMode.PANEL_RESOURCE_CREATION) { + PANEL_RESOURCE_CREATION(inputs, run_config) + } else { + log.error("received bad run mode: ${run_mode}") + Nextflow.exit(1) + } } } diff --git a/modules/local/amber/environment.yml b/modules/local/amber/environment.yml index 7a77f054..42a3e5d5 100644 --- a/modules/local/amber/environment.yml +++ b/modules/local/amber/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-amber=4.1.1 + - bioconda::hmftools-amber=4.2 diff --git a/modules/local/amber/main.nf b/modules/local/amber/main.nf index aa2e314c..b75b047c 100644 --- a/modules/local/amber/main.nf +++ b/modules/local/amber/main.nf @@ -4,18 +4,20 @@ process AMBER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-amber:4.1.1--hdfd78af_0' : - 'biocontainers/hmftools-amber:4.1.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-amber:4.2--hdfd78af_0' : + 'biocontainers/hmftools-amber:4.2--hdfd78af_0' }" input: tuple val(meta), path(tumor_bam), path(normal_bam), path(donor_bam), path(tumor_bai), path(normal_bai), path(donor_bai) val genome_ver path heterozygous_sites - path target_region_bed + path target_regions_bed + val tumor_min_depth output: tuple val(meta), path('amber/'), emit: amber_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -25,17 +27,21 @@ process AMBER { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def reference_ids = [] - if (meta.normal_id != null) reference_ids.add(meta.normal_id) - if (meta.donor_id != null) reference_ids.add(meta.donor_id) + if (meta.normal_id != null) { reference_ids.add(meta.normal_id) } + if (meta.donor_id != null) { reference_ids.add(meta.donor_id) } def reference_arg = reference_ids.size() > 0 ? "-reference ${String.join(",", reference_ids)}" : '' def reference_bams = [] - if (normal_bam) reference_bams.add(normal_bam.toString()) - if (donor_bam) reference_bams.add(donor_bam.toString()) + if (normal_bam) { reference_bams.add(normal_bam.toString()) } + if (donor_bam) { reference_bams.add(donor_bam.toString()) } def reference_bam_arg = reference_bams.size() > 0 ? "-reference_bam ${String.join(",", reference_bams)}" : '' - def target_regions_bed_arg = target_region_bed ? "-target_regions_bed ${target_region_bed}" : '' + def target_regions_bed_arg = target_regions_bed ? "-target_regions_bed ${target_regions_bed}" : '' + + def tumor_min_depth_arg = tumor_min_depth ? "-tumor_min_depth ${tumor_min_depth}" : '' """ amber \\ @@ -45,9 +51,11 @@ process AMBER { -tumor_bam ${tumor_bam} \\ ${reference_arg} \\ ${reference_bam_arg} \\ - ${target_regions_bed_arg} \\ -ref_genome_version ${genome_ver} \\ + ${target_regions_bed_arg} \\ -loci ${heterozygous_sites} \\ + ${tumor_min_depth_arg} \\ + ${log_level_arg} \\ -threads ${task.cpus} \\ -output_dir amber/ @@ -60,6 +68,7 @@ process AMBER { stub: """ mkdir -p amber/ + touch amber/placeholder echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml diff --git a/modules/local/amber/meta.yml b/modules/local/amber/meta.yml index 476550ea..72601e98 100644 --- a/modules/local/amber/meta.yml +++ b/modules/local/amber/meta.yml @@ -14,7 +14,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - tumor_bam: type: file description: Tumor BAM file @@ -46,22 +46,29 @@ input: type: file description: AMBER heterozygous sites file pattern: "*.{vcf.gz}" - - target_region_bed: + - target_regions_bed: type: file - description: Target region BED file (optional) + description: Target regions BED file (optional) pattern: "*.{bed}" + - tumor_min_depth: + type: string + description: Minimum depth for a site to be considered output: - meta: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - amber_dir: - type: directory + file: directory description: AMBER output directory + pattern: "versions.yml" - versions: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/bamtools/environment.yml b/modules/local/bamtools/environment.yml index f340a260..bb739b9c 100644 --- a/modules/local/bamtools/environment.yml +++ b/modules/local/bamtools/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-bam-tools=1.3 + - bioconda::hmftools-bam-tools=1.4.2 diff --git a/modules/local/bamtools/main.nf b/modules/local/bamtools/main.nf index 02e9c9dc..226f0c9a 100644 --- a/modules/local/bamtools/main.nf +++ b/modules/local/bamtools/main.nf @@ -4,17 +4,20 @@ process BAMTOOLS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-bam-tools:1.3--hdfd78af_0' : - 'biocontainers/hmftools-bam-tools:1.3--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-bam-tools:1.4.2--hdfd78af_0' : + 'biocontainers/hmftools-bam-tools:1.4.2--hdfd78af_0' }" input: tuple val(meta), path(bam), path(bai) path genome_fasta val genome_ver + path driver_gene_panel + path ensembl_data_resources output: tuple val(meta), path("${meta.id}_bamtools/"), emit: metrics_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -24,6 +27,8 @@ process BAMTOOLS { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ mkdir -p ${meta.id}_bamtools/ @@ -35,8 +40,10 @@ process BAMTOOLS { -bam_file ${bam} \\ -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ + -driver_gene_panel ${driver_gene_panel} \\ + -ensembl_data_dir ${ensembl_data_resources} \\ + ${log_level_arg} \\ -threads ${task.cpus} \\ - -log_level INFO \\ -output_dir ${meta.id}_bamtools/ cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/bamtools/meta.yml b/modules/local/bamtools/meta.yml index 69dda01b..63c9368b 100644 --- a/modules/local/bamtools/meta.yml +++ b/modules/local/bamtools/meta.yml @@ -29,6 +29,13 @@ input: - genome_ver: type: string description: Reference genome version + - driver_gene_panel: + type: file + description: Driver gene panel file + pattern: "*.{tsv}" + - ensembl_data_resources: + type: directory + description: HMF ensembl data resources directory output: - meta: type: map @@ -42,5 +49,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/bwa-mem2/mem/environment.yml b/modules/local/bwa-mem2/mem/environment.yml index 7fc9ecd7..e6e35644 100644 --- a/modules/local/bwa-mem2/mem/environment.yml +++ b/modules/local/bwa-mem2/mem/environment.yml @@ -4,6 +4,6 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-bwa-plus=1.0.0 + - bioconda::bwa-mem2=2.3 - bioconda::samtools=1.21 - bioconda::sambamba=1.0.1 diff --git a/modules/local/bwa-mem2/mem/main.nf b/modules/local/bwa-mem2/mem/main.nf index ae70e0b5..bd6a30f3 100644 --- a/modules/local/bwa-mem2/mem/main.nf +++ b/modules/local/bwa-mem2/mem/main.nf @@ -4,8 +4,8 @@ process BWAMEM2_ALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-bwa-plus:1.0.0--h077b44d_0' : - 'biocontainers/hmftools-bwa-plus:1.0.0--h077b44d_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:596c0d6a494faa218562f2be03af2714d454da4f-0' : + 'biocontainers/mulled-v2-4dde50190ae599f2bb2027cb2c8763ea00fb5084:596c0d6a494faa218562f2be03af2714d454da4f-0' }" input: tuple val(meta), path(reads_fwd), path(reads_rev) @@ -15,6 +15,7 @@ process BWAMEM2_ALIGN { output: tuple val(meta), path('*.bam'), path('*.bai'), emit: bam path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -30,7 +31,7 @@ process BWAMEM2_ALIGN { """ ln -fs \$(find -L ${genome_bwamem2_index} -type f) ./ - bwa-plus mem \\ + bwa-mem2 mem \\ ${args} \\ -Y \\ -K 100000000 \\ @@ -54,9 +55,10 @@ process BWAMEM2_ALIGN { --out ${output_fn} \\ /dev/stdin + # NOTE(SW): bwa-mem2 version hardcoded as 2.3 reports the wrong version, see https://github.com/bwa-mem2/bwa-mem2/issues/276 cat <<-END_VERSIONS > versions.yml "${task.process}": - bwa-plus: \$(bwa-plus version 2>/dev/null) + bwa-mem2: 2.3 sambamba: \$(sambamba --version 2>&1 | sed -n '/^sambamba / { s/^.* //p }' | head -n1) END_VERSIONS """ diff --git a/modules/local/bwa-mem2/mem/meta.yml b/modules/local/bwa-mem2/mem/meta.yml index da61d816..305a01a8 100644 --- a/modules/local/bwa-mem2/mem/meta.yml +++ b/modules/local/bwa-mem2/mem/meta.yml @@ -16,7 +16,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - reads_fwd: type: file description: Forward reads FASTQ file @@ -37,7 +37,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - bam: type: list description: BAM and BAI file @@ -46,6 +46,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" - "@mkcmkc" diff --git a/modules/local/chord/environment.yml b/modules/local/chord/environment.yml index cd5b4cb1..6e71ad2b 100644 --- a/modules/local/chord/environment.yml +++ b/modules/local/chord/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-chord=2.1.0 + - bioconda::hmftools-chord=2.1.2 diff --git a/modules/local/chord/main.nf b/modules/local/chord/main.nf index 2f13c91f..0f6ab506 100644 --- a/modules/local/chord/main.nf +++ b/modules/local/chord/main.nf @@ -4,8 +4,8 @@ process CHORD { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-chord:2.1.0--hdfd78af_0' : - 'biocontainers/hmftools-chord:2.1.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-chord:2.1.2--hdfd78af_0' : + 'biocontainers/hmftools-chord:2.1.2--hdfd78af_0' }" input: tuple val(meta), path(smlv_vcf), path(sv_vcf) @@ -16,6 +16,7 @@ process CHORD { output: tuple val(meta), path('chord/'), emit: chord_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -25,6 +26,8 @@ process CHORD { def xmx_mod = task.ext.xmx_mod ?: 0.95 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ ## NOTE(LN): The CHORD jar runs an embedded R script using 'com.hartwig.hmftools.common.utils.r.RExecutor' which requires absolute ## paths. Relative paths don't work because RExecutor executes from a tmp dir, and not the working dir of this nextflow process @@ -37,20 +40,20 @@ process CHORD { -sample ${meta.sample_id} \\ -snv_indel_vcf_file \$(realpath ${smlv_vcf}) \\ -sv_vcf_file \$(realpath ${sv_vcf}) \\ - -output_dir \$(realpath chord/) \\ -ref_genome ${genome_fasta} \\ - -log_level DEBUG + ${log_level_arg} \\ + -output_dir \$(realpath chord/) cat <<-END_VERSIONS > versions.yml "${task.process}": chord: \$(chord -version | sed -n '/^CHORD version/ { s/^.* //p }') END_VERSIONS - """ stub: """ mkdir -p chord/ + touch chord/${meta.sample_id}.chord.mutation_contexts.tsv touch chord/${meta.sample_id}.chord.prediction.tsv diff --git a/modules/local/chord/meta.yml b/modules/local/chord/meta.yml index 6e92da8f..49457be0 100644 --- a/modules/local/chord/meta.yml +++ b/modules/local/chord/meta.yml @@ -35,9 +35,6 @@ input: type: file description: Reference genome assembly dict file pattern: "*.{dict}" - - genome_ver: - type: string - description: Reference genome version output: - meta: type: map @@ -51,5 +48,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/cider/main.nf b/modules/local/cider/main.nf index 7c169b57..c51d24ee 100644 --- a/modules/local/cider/main.nf +++ b/modules/local/cider/main.nf @@ -15,6 +15,7 @@ process CIDER { output: tuple val(meta), path('cider/*'), emit: cider_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -24,6 +25,8 @@ process CIDER { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ cider \\ -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ @@ -31,11 +34,12 @@ process CIDER { ${args} \\ -sample ${meta.sample_id} \\ -bam ${bam} \\ + -ref_genome_version ${genome_ver} \\ -blast \$(which blastn | sed 's#/bin/blastn##') \\ -blast_db ${human_blastdb} \\ - -ref_genome_version ${genome_ver} \\ - -threads ${task.cpus} \\ -write_cider_bam \\ + -threads ${task.cpus} \\ + ${log_level_arg} \\ -output_dir cider/ cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/cider/meta.yml b/modules/local/cider/meta.yml index 8c0dc0d5..08967c8e 100644 --- a/modules/local/cider/meta.yml +++ b/modules/local/cider/meta.yml @@ -15,7 +15,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - bam: type: file description: BAM file @@ -35,7 +35,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - cider_dir: type: directory description: CIDER output directory @@ -43,5 +43,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/cobalt/panel_normalisation/environment.yml b/modules/local/cobalt/panel_normalisation/environment.yml new file mode 100644 index 00000000..c7a860fb --- /dev/null +++ b/modules/local/cobalt/panel_normalisation/environment.yml @@ -0,0 +1,7 @@ +name: cobalt_panel_normalisation +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-cobalt=2.1 diff --git a/modules/local/cobalt/panel_normalisation/main.nf b/modules/local/cobalt/panel_normalisation/main.nf new file mode 100644 index 00000000..659d9490 --- /dev/null +++ b/modules/local/cobalt/panel_normalisation/main.nf @@ -0,0 +1,65 @@ +process COBALT_PANEL_NORMALISATION { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-cobalt:2.1--hdfd78af_1' : + 'biocontainers/hmftools-cobalt:2.1--hdfd78af_1' }" + + input: + tuple path('amber_dir.*'), path('cobalt_dir.*') + val genome_ver + path gc_profile + path target_regions_bed + + output: + path 'cobalt.region_normalisation.*.tsv', emit: cobalt_normalisation + path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + + """ + mkdir -p inputs/ + + for fp in \$(find -L amber_dir.* cobalt_dir.* -type f ! -name '*.version'); do + ln -sf ../\${fp} inputs/\${fp##*/}; + done + + ( + echo SampleId + basename -s .amber.baf.tsv.gz -a inputs/*.amber.baf.tsv.gz + ) > sample_ids.txt + + cobalt \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.cobalt.norm.NormalisationFileBuilder \\ + ${args} \\ + -sample_id_file sample_ids.txt \\ + -amber_dir inputs/ \\ + -cobalt_dir inputs/ \\ + -ref_genome_version ${genome_ver} \\ + -gc_profile ${gc_profile} \\ + -target_regions_bed ${target_regions_bed} \\ + ${log_level_arg} \\ + -output_file cobalt.region_normalisation.${genome_ver}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cobalt_panel_normalisation: \$(cobalt -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch cobalt.region_normalisation.${genome_ver}.tsv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/cobalt/panel_normalisation/meta.yml b/modules/local/cobalt/panel_normalisation/meta.yml new file mode 100644 index 00000000..166085fc --- /dev/null +++ b/modules/local/cobalt/panel_normalisation/meta.yml @@ -0,0 +1,44 @@ +name: cobalt_panel_normalisation +description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes +keywords: + - cobalt + - read depth ratios + - cnv +tools: + - cobalt: + description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/cobalt + documentation: https://github.com/hartwigmedical/hmftools/tree/master/cobalt + licence: ["GPL v3"] +input: + - amber_dirs: + type: directory + description: List of AMBER output directories + - cobalt_dirs: + type: directory + description: List of COBALT output directories + - genome_ver: + type: string + description: Reference genome version + - gc_profile: + type: file + description: GC profile file + pattern: "*.{cnp}" + - target_regions_bed: + type: file + description: Target regions BED file + pattern: "*.{bed}" +output: + - cobalt_normalisation: + type: file + description: COBALT normalisation file + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - command_files: + type: list + description: List of command files +authors: + - "@scwatts" diff --git a/modules/local/esvee/call/environment.yml b/modules/local/cobalt/run/environment.yml similarity index 55% rename from modules/local/esvee/call/environment.yml rename to modules/local/cobalt/run/environment.yml index 01355c11..80e8b458 100644 --- a/modules/local/esvee/call/environment.yml +++ b/modules/local/cobalt/run/environment.yml @@ -1,7 +1,7 @@ -name: esvee_call +name: cobalt_run channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::hmftools-esvee=1.0.3 + - bioconda::hmftools-cobalt=2.1 diff --git a/modules/local/cobalt/main.nf b/modules/local/cobalt/run/main.nf similarity index 65% rename from modules/local/cobalt/main.nf rename to modules/local/cobalt/run/main.nf index c4bbc8e6..03541aa0 100644 --- a/modules/local/cobalt/main.nf +++ b/modules/local/cobalt/run/main.nf @@ -4,18 +4,20 @@ process COBALT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-cobalt:2.0--hdfd78af_0' : - 'biocontainers/hmftools-cobalt:2.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-cobalt:2.1--hdfd78af_1' : + 'biocontainers/hmftools-cobalt:2.1--hdfd78af_1' }" input: tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai) path gc_profile path diploid_regions path target_region_normalisation + val targeted_mode output: tuple val(meta), path('cobalt/'), emit: cobalt_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -25,15 +27,18 @@ process COBALT { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : '' def reference_bam_arg = normal_bam ? "-reference_bam ${normal_bam}" : '' - def diploid_regions_arg = diploid_regions ? "-tumor_only_diploid_bed ${diploid_regions}" : '' - def target_region_arg = target_region_normalisation ? "-target_region ${target_region_normalisation}" : '' + def target_region_norm_file_arg = target_region_normalisation ? "-target_region_norm_file ${target_region_normalisation}" : '' + + def tumor_only_mode = !meta.containsKey('normal_id') - def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) - def pcf_gamma_arg = run_mode === Constants.RunMode.TARGETED && !meta.containsKey('normal_id') - ? "-pcf_gamma 50" : "" + def pcf_gamma_arg = targeted_mode && tumor_only_mode ? '-pcf_gamma 50' : '' + + def diploid_regions_arg = !targeted_mode && tumor_only_mode ? "-tumor_only_diploid_bed ${diploid_regions}" : '' """ cobalt \\ @@ -41,24 +46,26 @@ process COBALT { ${args} \\ -tumor ${meta.tumor_id} \\ -tumor_bam ${tumor_bam} \\ + ${pcf_gamma_arg} \\ ${reference_arg} \\ ${reference_bam_arg} \\ - -threads ${task.cpus} \\ -gc_profile ${gc_profile} \\ ${diploid_regions_arg} \\ - ${target_region_arg} \\ - ${pcf_gamma_arg} \\ + ${target_region_norm_file_arg} \\ + ${log_level_arg} \\ + -threads ${task.cpus} \\ -output_dir cobalt/ cat <<-END_VERSIONS > versions.yml "${task.process}": - cobalt: \$(cobalt -version | sed -n '/^Cobalt version/ { s/^.* //p }') + cobalt_run: \$(cobalt -version | sed -n '/^Cobalt version/ { s/^.* //p }') END_VERSIONS """ stub: """ mkdir -p cobalt/ + touch cobalt/placeholder echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml diff --git a/modules/local/cobalt/meta.yml b/modules/local/cobalt/run/meta.yml similarity index 86% rename from modules/local/cobalt/meta.yml rename to modules/local/cobalt/run/meta.yml index 61812410..d9fe3b76 100644 --- a/modules/local/cobalt/meta.yml +++ b/modules/local/cobalt/run/meta.yml @@ -1,4 +1,4 @@ -name: cobalt +name: cobalt_run description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes keywords: - cobalt @@ -15,7 +15,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - tumor_bam: type: file description: Tumor BAM file @@ -44,12 +44,15 @@ input: type: file description: Normalisation file (optional) pattern: "*.{tsv}" + - targeted_mode: + type: boolean + description: Flag indicating whether targeted mode is set output: - meta: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - cobalt_dir: type: directory description: COBALT output directory @@ -57,5 +60,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/cuppa/environment.yml b/modules/local/cuppa/environment.yml index d6280c19..68842773 100644 --- a/modules/local/cuppa/environment.yml +++ b/modules/local/cuppa/environment.yml @@ -5,3 +5,5 @@ channels: - defaults dependencies: - bioconda::hmftools-cuppa=2.3.2 + - conda-forge::r-stringr>=1.5 + - conda-forge::r-stringi>=1.8 diff --git a/modules/local/cuppa/main.nf b/modules/local/cuppa/main.nf index 3a4ab7ec..c21fd7bc 100644 --- a/modules/local/cuppa/main.nf +++ b/modules/local/cuppa/main.nf @@ -17,6 +17,7 @@ process CUPPA { output: tuple val(meta), path('cuppa/'), emit: cuppa_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -27,6 +28,8 @@ process CUPPA { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def isofox_dir_name = categories == 'ALL' ? 'isofox_dir__prepared' : isofox_dir def isofox_dir_arg = isofox_dir ? "-isofox_dir ${isofox_dir_name}" : '' def ref_alt_sj_sites_arg = isofox_dir ? "-ref_alt_sj_sites ${cuppa_alt_sj}" : '' @@ -57,8 +60,9 @@ process CUPPA { ${linx_dir_arg} \\ ${virusinterpreter_dir_arg} \\ ${isofox_dir_arg} \\ - ${ref_alt_sj_sites_arg} \\ -ref_genome_version ${genome_ver} \\ + ${ref_alt_sj_sites_arg} \\ + ${log_level_arg} \\ -output_dir cuppa/ # Make predictions diff --git a/modules/local/cuppa/meta.yml b/modules/local/cuppa/meta.yml index 58e6549a..281b1d6e 100644 --- a/modules/local/cuppa/meta.yml +++ b/modules/local/cuppa/meta.yml @@ -53,5 +53,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/custom/extract_tarball/main.nf b/modules/local/custom/extract_tarball/main.nf index e956b0ad..cbe743e1 100644 --- a/modules/local/custom/extract_tarball/main.nf +++ b/modules/local/custom/extract_tarball/main.nf @@ -11,6 +11,7 @@ process CUSTOM_EXTRACTTARBALL { output: path "${meta.id}/", emit: extracted_dir + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/custom/lilac_extract_and_index_contig/main.nf b/modules/local/custom/lilac_extract_and_index_contig/main.nf index 986fe6cf..128140bf 100644 --- a/modules/local/custom/lilac_extract_and_index_contig/main.nf +++ b/modules/local/custom/lilac_extract_and_index_contig/main.nf @@ -14,9 +14,10 @@ process CUSTOM_EXTRACTCONTIG { val run output: - path "*extracted.fa" , emit: contig - path "*extracted.fa.*", emit: bwamem2_index + path '*extracted.fa' , emit: contig + path '*extracted.fa.*', emit: bwamem2_index path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/custom/lilac_realign_reads_lilac/main.nf b/modules/local/custom/lilac_realign_reads_lilac/main.nf index e5667fb3..df9a45b8 100644 --- a/modules/local/custom/lilac_realign_reads_lilac/main.nf +++ b/modules/local/custom/lilac_realign_reads_lilac/main.nf @@ -13,8 +13,9 @@ process CUSTOM_REALIGNREADS { path reference_indices output: - tuple val(meta), path("*realigned.bam"), path("*realigned.bam.bai"), emit: bam + tuple val(meta), path('*realigned.bam'), path('*realigned.bam.bai'), emit: bam path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/custom/lilac_slice/main.nf b/modules/local/custom/lilac_slice/main.nf index 56498143..93a7978d 100644 --- a/modules/local/custom/lilac_slice/main.nf +++ b/modules/local/custom/lilac_slice/main.nf @@ -12,8 +12,9 @@ process CUSTOM_SLICE { path bed output: - tuple val(meta), path("*sliced.bam"), path("*sliced.bam.bai"), emit: bam + tuple val(meta), path('*sliced.bam'), path('*sliced.bam.bai'), emit: bam path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/custom/write_reference_data/main.nf b/modules/local/custom/write_reference_data/main.nf index 04a1151e..c8fe6185 100644 --- a/modules/local/custom/write_reference_data/main.nf +++ b/modules/local/custom/write_reference_data/main.nf @@ -8,7 +8,6 @@ process WRITE_REFERENCE_DATA { input: path fp - val workflow_version output: path fp, includeInputs: true diff --git a/modules/local/esvee/assemble/main.nf b/modules/local/esvee/assemble/main.nf deleted file mode 100644 index 7b043e80..00000000 --- a/modules/local/esvee/assemble/main.nf +++ /dev/null @@ -1,76 +0,0 @@ -process ESVEE_ASSEMBLE { - tag "${meta.id}" - label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.0.3--hdfd78af_0' : - 'biocontainers/hmftools-esvee:1.0.3--hdfd78af_0' }" - - input: - tuple val(meta), path(tumor_prep_bam), path(tumor_prep_bai), path(normal_prep_bam), path(normal_prep_bai), path(prep_dir) - path genome_fasta - path genome_fai - path genome_dict - path genome_img - val genome_ver - path decoy_sequences_image - - output: - tuple val(meta), path('assemble/') , emit: assemble_dir - tuple val(meta), path("assemble/${meta.tumor_id}.esvee.raw.vcf.gz"), emit: raw_vcf - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - def xmx_mod = task.ext.xmx_mod ?: 0.95 - - def reference_arg = meta.normal_id != null ? "-reference ${meta.normal_id}" : '' - def reference_bam_arg = meta.normal_id != null ? "-reference_bam ${normal_prep_bam}" : '' - - def decoy_genome_arg = decoy_sequences_image ? "-decoy_genome ${decoy_sequences_image}" : '' - - """ - mkdir -p assemble/ - - esvee com.hartwig.hmftools.esvee.assembly.AssemblyApplication \\ - -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ - ${args} \\ - -tumor ${meta.tumor_id} \\ - -tumor_bam ${tumor_prep_bam} \\ - ${reference_arg} \\ - ${reference_bam_arg} \\ - -esvee_prep_dir ${prep_dir}/ \\ - -ref_genome ${genome_fasta} \\ - -ref_genome_version ${genome_ver} \\ - ${decoy_genome_arg} \\ - -write_types 'JUNC_ASSEMBLY;PHASED_ASSEMBLY;ALIGNMENT;BREAKEND;VCF' \\ - -output_dir assemble/ \\ - -threads ${task.cpus} \\ - -perf_log_time 10 \\ - -log_level DEBUG - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - esvee: \$(esvee -version | sed -n '/^Esvee version/ { s/^.* //p }') - END_VERSIONS - """ - - stub: - """ - mkdir -p assemble/ - - touch assemble/${meta.tumor_id}.esvee.raw.vcf.gz - touch assemble/${meta.tumor_id}.esvee.raw.vcf.gz.tbi - touch assemble/${meta.tumor_id}.esvee.alignment.tsv - touch assemble/${meta.tumor_id}.esvee.assembly.tsv - touch assemble/${meta.tumor_id}.esvee.phased_assembly.tsv - touch assemble/${meta.tumor_id}.esvee.breakend.tsv - - echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml - """ -} diff --git a/modules/local/esvee/assemble/meta.yml b/modules/local/esvee/assemble/meta.yml deleted file mode 100644 index e4dffb15..00000000 --- a/modules/local/esvee/assemble/meta.yml +++ /dev/null @@ -1,75 +0,0 @@ -name: esvee_assemble -description: Assemble SVs with ESVEE -keywords: - - assemble - - sv -tools: - - esvee: - description: Structural variant (SV) calling - homepage: https://github.com/hartwigmedical/hmftools/tree/master/esvee - documentation: https://github.com/hartwigmedical/hmftools/tree/master/esvee - licence: ["GPL >=3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [id: 'sample_id'] - - tumor_prep_bam: - type: file - description: Tumor prep BAM file - pattern: "*.{bam}" - - tumor_prep_bai: - type: file - description: Tumor prep BAI file - pattern: "*.{bai}" - - normal_prep_bam: - type: file - description: Normal prep BAM file - pattern: "*.{bam}" - - normal_prep_bai: - type: file - description: Normal prep BAI file - pattern: "*.{bai}" - - prep_dir: - type: directory - description: ESVEE prep output directory - - genome_fasta: - type: file - description: Reference genome assembly FASTA file - pattern: "*.{fa,fasta}" - - genome_fai: - type: file - description: Reference genome assembly fai file - pattern: "*.{fai}" - - genome_dict: - type: file - description: Reference genome assembly dict file - pattern: "*.{dict}" - - genome_img: - type: file - description: Reference genome assembly img file - pattern: "*.{img}" - - decouy_sequences_image: - type: file - description: ESVEE decoy sequences images file - pattern: "*.{img}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [id: 'sample_id'] - - assembly_dir: - type: directory - description: ESVEE assemble output directory - - raw_vcf: - type: file - description: Raw VCF output file - pattern: "*.{vcf.gz}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@scwatts" diff --git a/modules/local/esvee/call/main.nf b/modules/local/esvee/call/main.nf deleted file mode 100644 index a6480a18..00000000 --- a/modules/local/esvee/call/main.nf +++ /dev/null @@ -1,81 +0,0 @@ -process ESVEE_CALL { - tag "${meta.id}" - label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.0.3--hdfd78af_0' : - 'biocontainers/hmftools-esvee:1.0.3--hdfd78af_0' }" - - input: - tuple val(meta), path(ref_depth_vcf), path(prep_dir) - path genome_fasta - val genome_ver - path pon_breakends - path pon_breakpoints - path known_fusions - path repeatmasker_annotations - - output: - tuple val(meta), path("caller/") , emit: caller_dir - tuple val(meta), path("caller/${meta.tumor_id}.esvee.unfiltered.vcf.gz"), path("caller/${meta.tumor_id}.esvee.unfiltered.vcf.gz.tbi"), emit: unfiltered_vcf - tuple val(meta), path("caller/${meta.tumor_id}.esvee.somatic.vcf.gz"), path("caller/${meta.tumor_id}.esvee.somatic.vcf.gz.tbi") , emit: somatic_vcf - tuple val(meta), path("caller/${meta.tumor_id}.esvee.germline.vcf.gz"), path("caller/${meta.tumor_id}.esvee.germline.vcf.gz.tbi") , emit: germline_vcf, optional: true - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - def xmx_mod = task.ext.xmx_mod ?: 0.95 - - def reference_arg = meta.normal_id != null ? "-reference ${meta.normal_id}" : '' - - """ - mkdir -p caller/ - - esvee com.hartwig.hmftools.esvee.caller.CallerApplication \\ - -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ - ${args} \\ - -sample ${meta.tumor_id} \\ - ${reference_arg} \\ - -input_vcf ${ref_depth_vcf} \\ - -esvee_prep_dir ${prep_dir}/ \\ - -ref_genome_version ${genome_ver} \\ - -known_hotspot_file ${known_fusions} \\ - -pon_sgl_file ${pon_breakends} \\ - -pon_sv_file ${pon_breakpoints} \\ - -repeat_mask_file ${repeatmasker_annotations} \\ - -output_dir caller/ \\ - -log_level DEBUG - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - esvee: \$(esvee -version | sed -n '/^Esvee version/ { s/^.* //p }') - END_VERSIONS - """ - - stub: - """ - mkdir -p caller/ - - vcf_template='##fileformat=VCFv4.1 - ##contig= - #CHROM POS ID REF ALT QUAL FILTER INFO - . . . . . . . - ' - - echo \${vcf_template} | gzip -c > caller/${meta.tumor_id}.esvee.unfiltered.vcf.gz - echo \${vcf_template} | gzip -c > caller/${meta.tumor_id}.esvee.somatic.vcf.gz - - touch caller/${meta.tumor_id}.esvee.unfiltered.vcf.gz.tbi - touch caller/${meta.tumor_id}.esvee.somatic.vcf.gz.tbi - - ${ (meta.normal_id != null) ? "touch caller/${meta.tumor_id}.esvee.germline.vcf.gz" : '' } - ${ (meta.normal_id != null) ? "touch caller/${meta.tumor_id}.esvee.germline.vcf.gz.tbi" : '' } - - echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml - """ -} diff --git a/modules/local/esvee/depth_annotator/environment.yml b/modules/local/esvee/depth_annotator/environment.yml deleted file mode 100644 index 2a0425a6..00000000 --- a/modules/local/esvee/depth_annotator/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: esvee_depth_annotator -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::hmftools-esvee=1.0.3 diff --git a/modules/local/esvee/depth_annotator/main.nf b/modules/local/esvee/depth_annotator/main.nf deleted file mode 100644 index 4086179a..00000000 --- a/modules/local/esvee/depth_annotator/main.nf +++ /dev/null @@ -1,71 +0,0 @@ -process ESVEE_DEPTH_ANNOTATOR { - tag "${meta.id}" - label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.0.3--hdfd78af_0' : - 'biocontainers/hmftools-esvee:1.0.3--hdfd78af_0' }" - - input: - tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai), path(raw_vcf) - path genome_fasta - val genome_ver - path unmap_regions - - output: - tuple val(meta), path("depth_annotation/") , emit: depth_annotation_dir - tuple val(meta), path("depth_annotation/${meta.tumor_id}.esvee.ref_depth.vcf.gz"), emit: ref_depth_vcf - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - def xmx_mod = task.ext.xmx_mod ?: 0.75 - - def sample_ids = [meta.tumor_id] - def bam_files = [tumor_bam.toString()] - - if(meta.normal_id != null){ - sample_ids.add(meta.normal_id) - bam_files.add(normal_bam.toString()) - } - - def sample_ids_string = String.join(',', sample_ids) - def bam_files_string = String.join(',', bam_files) - - """ - mkdir -p depth_annotation/ - - esvee com.hartwig.hmftools.esvee.depth.DepthAnnotator \\ - -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ - ${args} \\ - -sample ${sample_ids_string} \\ - -bam_file ${bam_files_string} \\ - -input_vcf ${raw_vcf} \\ - -ref_genome ${genome_fasta} \\ - -ref_genome_version ${genome_ver} \\ - -unmap_regions ${unmap_regions} \\ - -output_dir depth_annotation/ \\ - -threads ${task.cpus} \\ - -log_level DEBUG - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - esvee: \$(esvee -version | sed -n '/^Esvee version/ { s/^.* //p }') - END_VERSIONS - """ - - stub: - """ - mkdir -p depth_annotation/ - - touch depth_annotation/${meta.tumor_id}.esvee.ref_depth.vcf.gz - touch depth_annotation/${meta.tumor_id}.esvee.ref_depth.vcf.gz.tbi - - echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml - """ -} diff --git a/modules/local/esvee/depth_annotator/meta.yml b/modules/local/esvee/depth_annotator/meta.yml deleted file mode 100644 index ab349fa7..00000000 --- a/modules/local/esvee/depth_annotator/meta.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: esvee_depth_annotator -description: Annotate ESVEE VCFs with depth information -keywords: - - depth - - annotation - - sv -tools: - - esvee: - description: Structural variant (SV) calling - homepage: https://github.com/hartwigmedical/hmftools/tree/master/esvee - documentation: https://github.com/hartwigmedical/hmftools/tree/master/esvee - licence: ["GPL >=3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [id: 'sample_id'] - - tumor_bam: - type: file - description: Tumor BAM file - pattern: "*.{bam}" - - tumor_bai: - type: file - description: Tumor BAI file - pattern: "*.{bai}" - - normal_bam: - type: file - description: Normal BAM file - pattern: "*.{bam}" - - normal_bai: - type: file - description: Normal BAI file - pattern: "*.{bai}" - - raw_vcf: - type: file - description: ESVEE assemble raw VCF file - pattern: "*.{vcf.gz}" - - genome_fasta: - type: file - description: Reference genome assembly FASTA file - pattern: "*.{fa,fasta}" - - genome_ver: - type: string - description: Reference genome version - - unmap_regions: - type: file - description: Hartwig unmap regions file - pattern: "*.{tsv}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [id: 'sample_id'] - - depth_annotation_dir: - type: directory - description: ESVEE depth annotation output directory - - ref_depth_vcf: - type: file - description: Depth annotated VCF file - pattern: "*.{vcf.gz}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@scwatts" diff --git a/modules/local/cobalt/environment.yml b/modules/local/esvee/environment.yml similarity index 58% rename from modules/local/cobalt/environment.yml rename to modules/local/esvee/environment.yml index 8b4e2bc1..1e56a788 100644 --- a/modules/local/cobalt/environment.yml +++ b/modules/local/esvee/environment.yml @@ -1,7 +1,7 @@ -name: cobalt +name: esvee channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::hmftools-cobalt=2.0 + - bioconda::hmftools-esvee=1.1.2 diff --git a/modules/local/esvee/main.nf b/modules/local/esvee/main.nf new file mode 100644 index 00000000..abb63cc9 --- /dev/null +++ b/modules/local/esvee/main.nf @@ -0,0 +1,89 @@ +process ESVEE { + tag "${meta.id}" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.1.2--hdfd78af_0' : + 'biocontainers/hmftools-esvee:1.1.2--hdfd78af_0' }" + + input: + tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai) + path genome_fasta + path genome_fai + path genome_dict + path genome_img + val genome_ver + path pon_breakends + path pon_breakpoints + path decoy_sequences_image + path known_fusions + path repeatmasker_annotations + path unmap_regions + + output: + tuple val(meta), path('esvee/') , emit: esvee_dir + tuple val(meta), path("esvee/${meta.tumor_id}.esvee.unfiltered.vcf.gz"), path("esvee/${meta.tumor_id}.esvee.unfiltered.vcf.gz.tbi"), emit: unfiltered_vcf + tuple val(meta), path("esvee/${meta.tumor_id}.esvee.somatic.vcf.gz"), path("esvee/${meta.tumor_id}.esvee.somatic.vcf.gz.tbi") , emit: somatic_vcf + tuple val(meta), path("esvee/${meta.tumor_id}.esvee.germline.vcf.gz"), path("esvee/${meta.tumor_id}.esvee.germline.vcf.gz.tbi") , emit: germline_vcf, optional: true + path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + + def reference_arg = meta.normal_id ? "-reference ${meta.normal_id}" : '' + def reference_bam_arg = meta.normal_id ? "-reference_bam ${normal_bam}" : '' + + """ + mkdir -p esvee/ + + esvee \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + ${args} \\ + -tumor ${meta.tumor_id} \\ + -tumor_bam ${tumor_bam} \\ + ${reference_arg} \\ + ${reference_bam_arg} \\ + -esvee_prep_dir esvee/ \\ + -ref_genome ${genome_fasta} \\ + -ref_genome_version ${genome_ver} \\ + -known_hotspot_file ${known_fusions} \\ + -pon_sgl_file ${pon_breakends} \\ + -pon_sv_file ${pon_breakpoints} \\ + -repeat_mask_file ${repeatmasker_annotations} \\ + -unmap_regions ${unmap_regions} \\ + -bamtool \$(which sambamba) \\ + -write_types 'PREP_JUNCTION;PREP_BAM;FRAGMENT_LENGTH_DIST;JUNC_ASSEMBLY;PHASED_ASSEMBLY;ALIGNMENT;BREAKEND;VCF' \\ + -threads ${task.cpus} \\ + ${log_level_arg} \\ + -output_dir esvee/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + esvee: \$(java -jar \${ESVEE_JAR} -version | sed 's/^.*Esvee version: //') + END_VERSIONS + """ + + stub: + """ + mkdir -p esvee/ + + touch esvee/${meta.tumor_id}.esvee.unfiltered.vcf.gz + touch esvee/${meta.tumor_id}.esvee.unfiltered.vcf.gz.tbi + touch esvee/${meta.tumor_id}.esvee.somatic.vcf.gz + touch esvee/${meta.tumor_id}.esvee.somatic.vcf.gz.tbi + touch esvee/${meta.tumor_id}.esvee.germline.vcf.gz + touch esvee/${meta.tumor_id}.esvee.germline.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + esvee: \$(echo "1.0-beta") + END_VERSIONS + """ +} diff --git a/modules/local/esvee/call/meta.yml b/modules/local/esvee/meta.yml similarity index 60% rename from modules/local/esvee/call/meta.yml rename to modules/local/esvee/meta.yml index 38979eb8..5bdb20cb 100644 --- a/modules/local/esvee/call/meta.yml +++ b/modules/local/esvee/meta.yml @@ -1,4 +1,4 @@ -name: esvee_call +name: esvee description: Call somatic SVs with ESVEE keywords: - calling @@ -8,28 +8,50 @@ tools: description: Structural variant (SV) calling homepage: https://github.com/hartwigmedical/hmftools/tree/master/esvee documentation: https://github.com/hartwigmedical/hmftools/tree/master/esvee - licence: ["GPL >=3"] + licence: ["GPL v3"] input: + - meta: - meta: type: map description: | Groovy Map containing sample information e.g. [id: 'sample_id'] - - ref_depth_vcf: + - tumor_bam: type: file - description: ESVEE depth annotated VCF file - pattern: "*.{vcf.gz}" - - prep_dir: - type: directory - description: ESVEE prep output directory + description: Tumor BAM file + pattern: "*.{bam}" + - tumor_bai: + type: file + description: Tumor BAI file + pattern: "*.{bai}" + - normal_bam: + type: file + description: Normal BAM file + pattern: "*.{bam}" + - normal_bai: + type: file + description: Normal BAI file + pattern: "*.{bai}" - genome_fasta: type: file description: Reference genome assembly FASTA file pattern: "*.{fa,fasta}" + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - genome_dict: + type: file + description: Reference genome assembly dict file + pattern: "*.{dict}" + - genome_img: + type: file + description: Reference genome assembly img file + pattern: "*.{img}" - genome_ver: type: string description: Reference genome version - - pon_breakend: + - pon_breakends: type: file description: GRIDSS breakend PON file pattern: "*.{bed.gz}" @@ -37,6 +59,10 @@ input: type: file description: GRIDSS breakpoint PON file pattern: "*.{bedpe.gz}" + - decoy_sequences_image: + type: file + description: ESVEE decoy sequences images file + pattern: "*.{img}" - known_fusions: type: file description: HMF Known Fusions file @@ -44,15 +70,19 @@ input: - repeatmasker_annotations: type: file description: RepeatMasker annotations file + - unmap_regions: + type: file + description: Hartwig unmap regions file + pattern: "*.{tsv}" output: - meta: type: map description: | Groovy Map containing sample information e.g. [id: 'sample_id'] - - caller_dir: + - esvee_dir: type: directory - description: ESVEE call output directory + description: ESVEE output directory - unfiltered_vcf: type: list description: Unfiltered VCF file @@ -69,5 +99,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/esvee/prep/main.nf b/modules/local/esvee/prep/main.nf deleted file mode 100644 index f1c6145e..00000000 --- a/modules/local/esvee/prep/main.nf +++ /dev/null @@ -1,81 +0,0 @@ -process ESVEE_PREP { - tag "${meta.id}" - label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-esvee:1.0.3--hdfd78af_0' : - 'biocontainers/hmftools-esvee:1.0.3--hdfd78af_0' }" - - input: - tuple val(meta), path(tumor_bam), path(tumor_bai), path(normal_bam), path(normal_bai) - path genome_fasta - val genome_ver - path sv_blocklist - path known_fusions - - output: - tuple val(meta), path("prep/") , emit: prep_dir - tuple val(meta), path("prep/${meta.tumor_id}.*.bam"), path("prep/${meta.tumor_id}.*.bam.bai") , emit: tumor_prep_bam - tuple val(meta), path("prep/${meta.normal_id}.*.bam"), path("prep/${meta.normal_id}.*.bam.bai"), emit: normal_prep_bam, optional: true - path 'versions.yml' , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - - def xmx_mod = task.ext.xmx_mod ?: 0.75 - - def sample_ids = [meta.tumor_id] - def bam_files = [tumor_bam.toString()] - - if(meta.normal_id != null){ - sample_ids.add(meta.normal_id) - bam_files.add(normal_bam.toString()) - } - - def sample_ids_string = String.join(',', sample_ids) - def bam_files_string = String.join(',', bam_files) - - """ - mkdir -p prep/ - - SAMBAMBA_PATH=\$(which sambamba) - - esvee com.hartwig.hmftools.esvee.prep.PrepApplication \\ - -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ - ${args} \\ - -sample "${sample_ids_string}" \\ - -bam_file "${bam_files_string}" \\ - -ref_genome ${genome_fasta} \\ - -ref_genome_version ${genome_ver} \\ - -blacklist_bed ${sv_blocklist} \\ - -known_fusion_bed ${known_fusions} \\ - -bamtool \$SAMBAMBA_PATH \\ - -write_types 'JUNCTIONS;BAM;FRAGMENT_LENGTH_DIST' \\ - -output_dir prep/ \\ - -threads ${task.cpus} \\ - -log_level DEBUG \\ - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - esvee: \$(esvee -version | sed -n '/^Esvee version/ { s/^.* //p }') - END_VERSIONS - """ - - stub: - """ - mkdir -p prep/ - - ${ (meta.normal_id != null) ? "touch prep/${meta.normal_id}.esvee.prep.bam" : '' } - ${ (meta.normal_id != null) ? "touch prep/${meta.normal_id}.esvee.prep.bam.bai" : '' } - touch "prep/${meta.tumor_id}.esvee.prep.bam" - touch "prep/${meta.tumor_id}.esvee.prep.bam.bai" - touch "prep/${meta.tumor_id}.esvee.prep.fragment_length.tsv" - touch "prep/${meta.tumor_id}.esvee.prep.junction.tsv" - - echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml - """ -} diff --git a/modules/local/esvee/prep/meta.yml b/modules/local/esvee/prep/meta.yml deleted file mode 100644 index 4ca71e03..00000000 --- a/modules/local/esvee/prep/meta.yml +++ /dev/null @@ -1,72 +0,0 @@ -name: esvee_prep -description: Select reads associated with SV events -keywords: - - filtering - - reads - - sv -tools: - - esvee: - description: Structural variant (SV) calling - homepage: https://github.com/hartwigmedical/hmftools/tree/master/esvee - documentation: https://github.com/hartwigmedical/hmftools/tree/master/esvee - licence: ["GPL >=3"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [id: 'sample_id'] - - tumor_bam: - type: file - description: Tumor BAM file - pattern: "*.{bam}" - - tumor_bai: - type: file - description: Tumor BAI file - pattern: "*.{bai}" - - normal_bam: - type: file - description: Normal BAM file - pattern: "*.{bam}" - - normal_bai: - type: file - description: Normal BAI file - pattern: "*.{bai}" - - genome_fasta: - type: file - description: Reference genome assembly FASTA file - pattern: "*.{fa,fasta}" - - genome_ver: - type: string - description: Reference genome version - - sv_blocklist: - type: file - description: SV Prep blocklist file - pattern: "*.{bed}" - - known_fusions: - type: file - description: Known fusions file - pattern: "*.{bedpe}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [id: 'sample_id'] - - prep_dir: - type: directory - description: ESVEE prep output directory - - tumor_prep_bam: - type: list - description: Tumor prep BAM and BAI file - pattern: "*.{bam,bam.bai}" - - normal_prep_bam: - type: list - description: Normal prep BAM and BAI file (optional) - pattern: "*.{bam,bam.bai}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@scwatts" diff --git a/modules/local/fastp/main.nf b/modules/local/fastp/main.nf index 8b197978..0ca9d241 100644 --- a/modules/local/fastp/main.nf +++ b/modules/local/fastp/main.nf @@ -17,6 +17,7 @@ process FASTP { output: tuple val(meta), path('*_R1.fastp.fastq.gz'), path('*_R2.fastp.fastq.gz'), emit: fastq path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -27,9 +28,9 @@ process FASTP { def split_by_lines_arg = max_fastq_records > 0 ? "--split_by_lines ${4 * max_fastq_records.toLong()}" : '' def umi_args_list = [] - if (umi_location) umi_args_list.add("--umi_loc ${umi_location}") - if (umi_length) umi_args_list.add("--umi_len ${umi_length}") - if (umi_skip >= 0) umi_args_list.add("--umi_skip ${umi_skip}") + if (umi_location) { umi_args_list.add("--umi_loc ${umi_location}") } + if (umi_length) { umi_args_list.add("--umi_len ${umi_length}") } + if (umi_skip >= 0) { umi_args_list.add("--umi_skip ${umi_skip}") } def umi_args = umi_args_list ? '--umi ' + umi_args_list.join(' ') : '' """ diff --git a/modules/local/fastp/meta.yml b/modules/local/fastp/meta.yml index 434d3cc4..a8584b8c 100644 --- a/modules/local/fastp/meta.yml +++ b/modules/local/fastp/meta.yml @@ -16,7 +16,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - reads_fwd: type: file description: Forward reads FASTQ file @@ -42,7 +42,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - fastq: type: list description: Forward and reverse FASTQ files @@ -51,6 +51,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" - "@mkcmkc" diff --git a/modules/local/gatk4/bwaindeximage/main.nf b/modules/local/gatk4/bwaindeximage/main.nf index 448963af..1231ba74 100644 --- a/modules/local/gatk4/bwaindeximage/main.nf +++ b/modules/local/gatk4/bwaindeximage/main.nf @@ -13,6 +13,7 @@ process GATK4_BWA_INDEX_IMAGE { output: path "${genome_fasta}.img", emit: img path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/gatk4/bwaindeximage/meta.yml b/modules/local/gatk4/bwaindeximage/meta.yml index 56140919..b81582e7 100644 --- a/modules/local/gatk4/bwaindeximage/meta.yml +++ b/modules/local/gatk4/bwaindeximage/meta.yml @@ -33,5 +33,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/gridss/index/main.nf b/modules/local/gridss/index/main.nf index dfc32cc5..7dbffb7d 100644 --- a/modules/local/gridss/index/main.nf +++ b/modules/local/gridss/index/main.nf @@ -17,6 +17,7 @@ process GRIDSS_INDEX { output: path 'gridss_index/', emit: index path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/gridss/index/meta.yml b/modules/local/gridss/index/meta.yml index 94aa572a..2e045b2b 100644 --- a/modules/local/gridss/index/meta.yml +++ b/modules/local/gridss/index/meta.yml @@ -33,5 +33,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/isofox/panel_normalisation/environment.yml b/modules/local/isofox/panel_normalisation/environment.yml new file mode 100644 index 00000000..832224f5 --- /dev/null +++ b/modules/local/isofox/panel_normalisation/environment.yml @@ -0,0 +1,7 @@ +name: isofox_panel_normalisation +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-isofox=1.7.2 diff --git a/modules/local/isofox/panel_normalisation/main.nf b/modules/local/isofox/panel_normalisation/main.nf new file mode 100644 index 00000000..3b8fb1eb --- /dev/null +++ b/modules/local/isofox/panel_normalisation/main.nf @@ -0,0 +1,63 @@ +process ISOFOX_PANEL_NORMALISATION { + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.2--hdfd78af_1' : + 'biocontainers/hmftools-isofox:1.7.2--hdfd78af_1' }" + + input: + path 'isofox_dirs.*' + val genome_ver + path gene_ids + path gene_distribution + + output: + path 'isofox.gene_normalisation.*.csv', emit: isofox_normalisation + path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + + """ + mkdir -p inputs/ + for fp in \$(find -L isofox_dirs.* -name '*.gene_data.csv'); do ln -sf ../\${fp} inputs/; done + + ( + echo SampleId + basename -s .isf.gene_data.csv -a inputs/*.isf.gene_data.csv + ) > sample_ids.txt + + isofox \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.isofox.cohort.CohortAnalyser \\ + ${args} \\ + -sample_data_file sample_ids.txt \\ + -root_data_dir inputs/ \\ + -analyses PANEL_TPM_NORMALISATION \\ + -gene_id_file ${gene_ids} \\ + -gene_distribution_file ${gene_distribution} \\ + ${log_level_arg} \\ + -output_dir ./ + + mv isofox.panel_gene_normalisation.csv isofox.gene_normalisation.${genome_ver}.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + isofox: \$(isofox -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch isofox.gene_normalisation.${genome_ver}.csv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/isofox/panel_normalisation/meta.yml b/modules/local/isofox/panel_normalisation/meta.yml new file mode 100644 index 00000000..8ad69273 --- /dev/null +++ b/modules/local/isofox/panel_normalisation/meta.yml @@ -0,0 +1,40 @@ +name: isofox_panel_normalisation +description: Characterise and count gene, transcript features +keywords: + - rna + - rnaseq +tools: + - isofox: + description: Characterises and counts gene, transcript features + homepage: https://github.com/hartwigmedical/hmftools/tree/master/isofox + documentation: https://github.com/hartwigmedical/hmftools/tree/master/isofox + licence: ["GPL v3"] +input: + - isofox_dirs: + type: directory + description: List of Isofox directories + - genome_ver: + type: string + description: Reference genome version + - gene_ids: + type: file + description: Isofox gene ID file (optional) + pattern: "*.{csv}" + - gene_distribution: + type: file + description: Isofox cohort gene expression file + pattern: "*.{csv}" +output: + - isofox_normalisation: + type: file + description: Isofox normalisation file + pattern: "versions.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - command_files: + type: list + description: List of command files +authors: + - "@scwatts" diff --git a/modules/local/esvee/prep/environment.yml b/modules/local/isofox/run/environment.yml similarity index 55% rename from modules/local/esvee/prep/environment.yml rename to modules/local/isofox/run/environment.yml index 6750d5c4..2b59e126 100644 --- a/modules/local/esvee/prep/environment.yml +++ b/modules/local/isofox/run/environment.yml @@ -1,7 +1,7 @@ -name: esvee_prep +name: isofox_run channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::hmftools-esvee=1.0.3 + - bioconda::hmftools-isofox=1.7.2 diff --git a/modules/local/isofox/main.nf b/modules/local/isofox/run/main.nf similarity index 89% rename from modules/local/isofox/main.nf rename to modules/local/isofox/run/main.nf index 6ddfa0a7..88f7c438 100644 --- a/modules/local/isofox/main.nf +++ b/modules/local/isofox/run/main.nf @@ -4,8 +4,8 @@ process ISOFOX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_1' : - 'biocontainers/hmftools-isofox:1.7.1--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.2--hdfd78af_1' : + 'biocontainers/hmftools-isofox:1.7.2--hdfd78af_1' }" input: tuple val(meta), path(bam), path(bai) @@ -24,6 +24,7 @@ process ISOFOX { output: tuple val(meta), path('isofox/'), emit: isofox_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -33,6 +34,8 @@ process ISOFOX { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def functions_arg = functions ? "-functions \'${functions}\'" : '' def exp_counts_arg = exp_counts ? "-exp_counts_file ${exp_counts}" : '' @@ -48,9 +51,9 @@ process ISOFOX { -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ ${args} \\ -sample ${meta.sample_id} \\ - -bam_file ${bam} \\ ${functions_arg} \\ -read_length ${read_length} \\ + -bam_file ${bam} \\ -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ -ensembl_data_dir ${ensembl_data_resources} \\ @@ -60,6 +63,7 @@ process ISOFOX { ${gene_ids_arg} \\ ${tpm_norm_arg} \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -output_dir isofox/ cat <<-END_VERSIONS > versions.yml @@ -71,6 +75,7 @@ process ISOFOX { stub: """ mkdir -p isofox/ + touch isofox/placeholder echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml diff --git a/modules/local/isofox/meta.yml b/modules/local/isofox/run/meta.yml similarity index 95% rename from modules/local/isofox/meta.yml rename to modules/local/isofox/run/meta.yml index 5ccf224e..5a8aa8ee 100644 --- a/modules/local/isofox/meta.yml +++ b/modules/local/isofox/run/meta.yml @@ -1,4 +1,4 @@ -name: isofox +name: isofox_run description: Characterise and count gene, transcript features keywords: - rna @@ -76,5 +76,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/lilac/environment.yml b/modules/local/lilac/environment.yml index 8832e639..b91fd31b 100644 --- a/modules/local/lilac/environment.yml +++ b/modules/local/lilac/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-lilac=1.6 + - bioconda::hmftools-lilac=1.7.1 diff --git a/modules/local/lilac/main.nf b/modules/local/lilac/main.nf index 49e22520..f553c7ff 100644 --- a/modules/local/lilac/main.nf +++ b/modules/local/lilac/main.nf @@ -4,8 +4,8 @@ process LILAC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-lilac:1.6--hdfd78af_1' : - 'biocontainers/hmftools-lilac:1.6--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/hmftools-lilac:1.7.1--hdfd78af_0' : + 'biocontainers/hmftools-lilac:1.7.1--hdfd78af_0' }" input: tuple val(meta), path(normal_dna_bam), path(normal_dna_bai), path(tumor_dna_bam), path(tumor_dna_bai), path(tumor_rna_bam), path(tumor_rna_bai), path(purple_dir) @@ -13,10 +13,12 @@ process LILAC { path genome_fai val genome_ver path lilac_resources, stageAs: 'lilac_resources' + val targeted_mode output: tuple val(meta), path('lilac/'), emit: lilac_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -26,6 +28,8 @@ process LILAC { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def sample_name = getSampleName(meta, tumor_dna_bam, normal_dna_bam) def normal_bam_arg = normal_dna_bam ? "-reference_bam ${normal_dna_bam}" : '' @@ -34,6 +38,8 @@ process LILAC { def purple_dir_arg = purple_dir ? "-purple_dir ${purple_dir}" : '' + def freq_score_penalty = targeted_mode ? '0.0018' : '0.0009' + """ lilac \\ -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ @@ -46,7 +52,9 @@ process LILAC { -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ -resource_dir ${lilac_resources} \\ + -freq_score_penalty ${freq_score_penalty} \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -output_dir lilac/ cat <<-END_VERSIONS > versions.yml @@ -58,6 +66,7 @@ process LILAC { stub: """ mkdir -p lilac/ + touch lilac/placeholder echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml diff --git a/modules/local/lilac/meta.yml b/modules/local/lilac/meta.yml index ae7eaaf6..5e8e63aa 100644 --- a/modules/local/lilac/meta.yml +++ b/modules/local/lilac/meta.yml @@ -15,7 +15,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - normal_dna_bam: type: file description: Normal DNA BAM file (optional) @@ -57,12 +57,15 @@ input: - lilac_resources: type: directory description: LILAC resources directory + - targeted_mode: + type: boolean + description: Flag indicating whether targeted mode is set output: - meta: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - lilac_dir: type: file description: LILAC output directory @@ -70,5 +73,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/linx/germline/environment.yml b/modules/local/linx/germline/environment.yml index 0305a9c6..b5b61d59 100644 --- a/modules/local/linx/germline/environment.yml +++ b/modules/local/linx/germline/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-linx=2.0.2 + - bioconda::hmftools-linx=2.1 diff --git a/modules/local/linx/germline/main.nf b/modules/local/linx/germline/main.nf index eb0d1682..c6c65074 100644 --- a/modules/local/linx/germline/main.nf +++ b/modules/local/linx/germline/main.nf @@ -4,8 +4,8 @@ process LINX_GERMLINE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.0.2--hdfd78af_0' : - 'biocontainers/hmftools-linx:2.0.2--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.1--hdfd78af_0' : + 'biocontainers/hmftools-linx:2.1--hdfd78af_0' }" input: tuple val(meta), path(sv_vcf) @@ -16,6 +16,7 @@ process LINX_GERMLINE { output: tuple val(meta), path('linx_germline/'), emit: annotation_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -25,6 +26,8 @@ process LINX_GERMLINE { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ linx \\ -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ @@ -35,6 +38,7 @@ process LINX_GERMLINE { -ref_genome_version ${genome_ver} \\ -ensembl_data_dir ${ensembl_data_resources} \\ -driver_gene_panel ${driver_gene_panel} \\ + ${log_level_arg} \\ -output_dir linx_germline/ cat <<-END_VERSIONS > versions.yml @@ -46,6 +50,7 @@ process LINX_GERMLINE { stub: """ mkdir linx_germline/ + touch linx_germline/placeholder echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml diff --git a/modules/local/linx/germline/meta.yml b/modules/local/linx/germline/meta.yml index bf3befa8..1a80a796 100644 --- a/modules/local/linx/germline/meta.yml +++ b/modules/local/linx/germline/meta.yml @@ -43,5 +43,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/linx/somatic/environment.yml b/modules/local/linx/somatic/environment.yml index 331323b5..2b047417 100644 --- a/modules/local/linx/somatic/environment.yml +++ b/modules/local/linx/somatic/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-linx=2.0.2 + - bioconda::hmftools-linx=2.1 diff --git a/modules/local/linx/somatic/main.nf b/modules/local/linx/somatic/main.nf index dcc27268..cccfc9c8 100644 --- a/modules/local/linx/somatic/main.nf +++ b/modules/local/linx/somatic/main.nf @@ -4,8 +4,8 @@ process LINX_SOMATIC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.0.2--hdfd78af_0' : - 'biocontainers/hmftools-linx:2.0.2--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.1--hdfd78af_0' : + 'biocontainers/hmftools-linx:2.1--hdfd78af_0' }" input: tuple val(meta), path(purple_dir) @@ -17,6 +17,7 @@ process LINX_SOMATIC { output: tuple val(meta), path('linx_somatic/'), emit: annotation_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -26,6 +27,8 @@ process LINX_SOMATIC { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ linx \\ -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ @@ -39,6 +42,7 @@ process LINX_SOMATIC { -driver_gene_panel ${driver_gene_panel} \\ -write_vis_data \\ -write_neo_epitopes \\ + ${log_level_arg} \\ -output_dir linx_somatic/ cat <<-END_VERSIONS > versions.yml @@ -50,6 +54,7 @@ process LINX_SOMATIC { stub: """ mkdir linx_somatic/ + touch linx_somatic/placeholder echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml diff --git a/modules/local/linx/somatic/meta.yml b/modules/local/linx/somatic/meta.yml index d838d2c8..7d760cda 100644 --- a/modules/local/linx/somatic/meta.yml +++ b/modules/local/linx/somatic/meta.yml @@ -46,5 +46,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/linx/visualiser/environment.yml b/modules/local/linx/visualiser/environment.yml index 0dbcd0fe..e9b1bfdd 100644 --- a/modules/local/linx/visualiser/environment.yml +++ b/modules/local/linx/visualiser/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-linx=2.0.2 + - bioconda::hmftools-linx=2.1 diff --git a/modules/local/linx/visualiser/main.nf b/modules/local/linx/visualiser/main.nf index df10802a..0c59f47c 100644 --- a/modules/local/linx/visualiser/main.nf +++ b/modules/local/linx/visualiser/main.nf @@ -4,8 +4,8 @@ process LINX_VISUALISER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.0.2--hdfd78af_0' : - 'biocontainers/hmftools-linx:2.0.2--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-linx:2.1--hdfd78af_0' : + 'biocontainers/hmftools-linx:2.1--hdfd78af_0' }" input: tuple val(meta), path(linx_annotation_dir) @@ -15,6 +15,7 @@ process LINX_VISUALISER { output: tuple val(meta), path('plots/'), emit: plots path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -25,6 +26,8 @@ process LINX_VISUALISER { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ # NOTE(SW): the output plot directories are always required for ORANGE, which is straightfoward to handle with POSIX # fs but more involved with FusionFS since it will not write empty directories to S3. A placeholder file can't be @@ -54,6 +57,7 @@ process LINX_VISUALISER { -ensembl_data_dir ${ensembl_data_resources} \\ -circos \$(which circos) \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -plot_out plots/all/ \\ -data_out data/all/ @@ -78,6 +82,7 @@ process LINX_VISUALISER { -circos \$(which circos) \\ -plot_reportable \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -plot_out plots/reportable/ \\ -data_out data/reportable/ @@ -95,6 +100,7 @@ process LINX_VISUALISER { stub: """ mkdir -p plots/{all,reportable}/ + touch plots/{all,reportable}/placeholder echo -e '${task.process}:\n stub: noversions\n' > versions.yml diff --git a/modules/local/linx/visualiser/meta.yml b/modules/local/linx/visualiser/meta.yml index d48c1fac..abeee967 100644 --- a/modules/local/linx/visualiser/meta.yml +++ b/modules/local/linx/visualiser/meta.yml @@ -39,5 +39,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/linxreport/main.nf b/modules/local/linxreport/main.nf index 9e9c7caf..1cced3e7 100644 --- a/modules/local/linxreport/main.nf +++ b/modules/local/linxreport/main.nf @@ -13,6 +13,7 @@ process LINXREPORT { output: tuple val(meta), path('*_linx.html'), emit: html path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/linxreport/meta.yml b/modules/local/linxreport/meta.yml index 7bf2e0ab..6e3decef 100644 --- a/modules/local/linxreport/meta.yml +++ b/modules/local/linxreport/meta.yml @@ -37,5 +37,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/neo/annotate_fusions/environment.yml b/modules/local/neo/annotate_fusions/environment.yml index 79150b43..d33f2bae 100644 --- a/modules/local/neo/annotate_fusions/environment.yml +++ b/modules/local/neo/annotate_fusions/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-isofox=1.7.1 + - bioconda::hmftools-isofox=1.7.2 diff --git a/modules/local/neo/annotate_fusions/main.nf b/modules/local/neo/annotate_fusions/main.nf index ffb2a459..d7c1e858 100644 --- a/modules/local/neo/annotate_fusions/main.nf +++ b/modules/local/neo/annotate_fusions/main.nf @@ -4,8 +4,8 @@ process NEO_ANNOTATE_FUSIONS { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_1' : - 'biocontainers/hmftools-isofox:1.7.1--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.2--hdfd78af_1' : + 'biocontainers/hmftools-isofox:1.7.2--hdfd78af_1' }" input: tuple val(meta), path(neo_finder_dir), path(bam), path(bai) @@ -18,6 +18,7 @@ process NEO_ANNOTATE_FUSIONS { output: tuple val(meta), path('*isf.neoepitope.tsv'), emit: annotated_fusions path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -27,6 +28,8 @@ process NEO_ANNOTATE_FUSIONS { def xmx_mod = task.ext.xmx_mod ?: 0.95 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ mkdir -p isofox/ @@ -36,12 +39,13 @@ process NEO_ANNOTATE_FUSIONS { -sample ${meta.sample_id} \\ -bam_file ${bam} \\ -functions NEO_EPITOPES \\ - -neo_dir ${neo_finder_dir} \\ -read_length ${read_length} \\ + -neo_dir ${neo_finder_dir} \\ -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ -ensembl_data_dir ${ensembl_data_resources} \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -output_dir ./ cat <<-END_VERSIONS > versions.yml @@ -53,6 +57,7 @@ process NEO_ANNOTATE_FUSIONS { stub: """ touch ${meta.sample_id}.isf.neoepitope.tsv + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/neo/annotate_fusions/meta.yml b/modules/local/neo/annotate_fusions/meta.yml index f9573815..c0ba191f 100644 --- a/modules/local/neo/annotate_fusions/meta.yml +++ b/modules/local/neo/annotate_fusions/meta.yml @@ -58,6 +58,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" - "@charlesshale" diff --git a/modules/local/neo/finder/environment.yml b/modules/local/neo/finder/environment.yml index 2e973cb8..2e92c519 100644 --- a/modules/local/neo/finder/environment.yml +++ b/modules/local/neo/finder/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-neo=1.2 + - bioconda::hmftools-neo=1.2.1 diff --git a/modules/local/neo/finder/main.nf b/modules/local/neo/finder/main.nf index 2e46bfd6..af9751c1 100644 --- a/modules/local/neo/finder/main.nf +++ b/modules/local/neo/finder/main.nf @@ -4,8 +4,8 @@ process NEO_FINDER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-neo:1.2--hdfd78af_1' : - 'biocontainers/hmftools-neo:1.2--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/hmftools-neo:1.2.1--hdfd78af_0' : + 'biocontainers/hmftools-neo:1.2.1--hdfd78af_0' }" input: tuple val(meta), path(purple_dir), path(linx_annotation_dir) @@ -17,6 +17,7 @@ process NEO_FINDER { output: tuple val(meta), path('neo_finder/'), emit: neo_finder_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -26,6 +27,8 @@ process NEO_FINDER { def xmx_mod = task.ext.xmx_mod ?: 0.95 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ mkdir -p neo_finder/ @@ -38,7 +41,7 @@ process NEO_FINDER { -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ -ensembl_data_dir ${ensembl_data_resources} \\ - -log_debug \\ + ${log_level_arg} \\ -output_dir neo_finder/ cat <<-END_VERSIONS > versions.yml @@ -50,6 +53,7 @@ process NEO_FINDER { stub: """ mkdir -p neo_finder/ + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/neo/finder/meta.yml b/modules/local/neo/finder/meta.yml index 01dc2fcb..9171f4dd 100644 --- a/modules/local/neo/finder/meta.yml +++ b/modules/local/neo/finder/meta.yml @@ -47,6 +47,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" - "@charlesshale" diff --git a/modules/local/neo/scorer/environment.yml b/modules/local/neo/scorer/environment.yml index 284c1335..f8a309f8 100644 --- a/modules/local/neo/scorer/environment.yml +++ b/modules/local/neo/scorer/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-neo=1.2 + - bioconda::hmftools-neo=1.2.1 diff --git a/modules/local/neo/scorer/main.nf b/modules/local/neo/scorer/main.nf index c8367255..f3a817d0 100644 --- a/modules/local/neo/scorer/main.nf +++ b/modules/local/neo/scorer/main.nf @@ -4,8 +4,8 @@ process NEO_SCORER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-neo:1.2--hdfd78af_1' : - 'biocontainers/hmftools-neo:1.2--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/hmftools-neo:1.2.1--hdfd78af_0' : + 'biocontainers/hmftools-neo:1.2.1--hdfd78af_0' }" input: tuple val(meta), path(isofox_dir), path(purple_dir), path(sage_vcf), path(lilac_dir), path(neo_finder_dir), path(annotated_fusions) @@ -16,6 +16,7 @@ process NEO_SCORER { output: tuple val(meta), path('neo_scorer/'), emit: neo_scorer_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -25,6 +26,8 @@ process NEO_SCORER { def xmx_mod = task.ext.xmx_mod ?: 0.95 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def rna_sample_arg = meta.containsKey('sample_rna_id') ? "-rna_sample ${meta.sample_rna_id}" : '' def rna_somatic_vcf_arg = meta.containsKey('sample_rna_id') ? "-rna_somatic_vcf ${sage_vcf}" : '' @@ -49,16 +52,16 @@ process NEO_SCORER { ${args} \\ -sample ${meta.sample_id} \\ ${cancer_type_arg} \\ + -purple_dir ${purple_dir} \\ ${rna_sample_arg} \\ \${isofox_dir_arg} \\ - -purple_dir ${purple_dir} \\ ${rna_somatic_vcf_arg} \\ -lilac_dir ${lilac_dir} \\ -neo_dir ${neo_finder_dir} \\ -ensembl_data_dir ${ensembl_data_resources} \\ -score_file_dir ${neo_resources} \\ -cancer_tpm_medians_file ${cohort_tpm_medians} \\ - -log_debug \\ + ${log_level_arg} \\ -output_dir neo_scorer/ cat <<-END_VERSIONS > versions.yml @@ -70,6 +73,7 @@ process NEO_SCORER { stub: """ mkdir -p neo_scorer/ + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ } diff --git a/modules/local/neo/scorer/meta.yml b/modules/local/neo/scorer/meta.yml index c870da4d..5ce8f853 100644 --- a/modules/local/neo/scorer/meta.yml +++ b/modules/local/neo/scorer/meta.yml @@ -57,6 +57,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" - "@charlesshale" diff --git a/modules/local/orange/environment.yml b/modules/local/orange/environment.yml index dc3f3020..45b29182 100644 --- a/modules/local/orange/environment.yml +++ b/modules/local/orange/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-orange=3.8.1 + - bioconda::hmftools-orange=4.1 diff --git a/modules/local/orange/main.nf b/modules/local/orange/main.nf index 708dd3ce..a2539487 100644 --- a/modules/local/orange/main.nf +++ b/modules/local/orange/main.nf @@ -4,8 +4,8 @@ process ORANGE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-orange:3.8.1--hdfd78af_0' : - 'biocontainers/hmftools-orange:3.8.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-orange:4.1--hdfd78af_0' : + 'biocontainers/hmftools-orange:4.1--hdfd78af_0' }" input: tuple val(meta), @@ -42,6 +42,7 @@ process ORANGE { tuple val(meta), path('output/*.orange.pdf') , emit: pdf, optional: true tuple val(meta), path('output/*.orange.json'), emit: json, optional: true path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -51,6 +52,8 @@ process ORANGE { def xmx_mod = task.ext.xmx_mod ?: 0.95 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def pipeline_version_str = pipeline_version ?: 'not specified' def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode); @@ -162,6 +165,7 @@ process ORANGE { -ensembl_data_dir ${ensembl_data_resources} \\ ${isofox_gene_distribution_arg} \\ ${isofox_alt_sj_arg} \\ + ${log_level_arg} \\ -output_dir output/ cat <<-END_VERSIONS > versions.yml @@ -173,6 +177,7 @@ process ORANGE { stub: """ mkdir -p output/ + touch output/${meta.tumor_id}.orange.json touch output/${meta.tumor_id}.orange.pdf diff --git a/modules/local/orange/meta.yml b/modules/local/orange/meta.yml index c4e45850..33b038de 100644 --- a/modules/local/orange/meta.yml +++ b/modules/local/orange/meta.yml @@ -13,7 +13,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - bamtools_somatic_dir: type: directory description: BamTools somatic output directory @@ -124,5 +124,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/pave/germline/environment.yml b/modules/local/pave/germline/environment.yml index 946ac0e8..6bd33a54 100644 --- a/modules/local/pave/germline/environment.yml +++ b/modules/local/pave/germline/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-pave=1.7.1 + - bioconda::hmftools-pave=1.8 diff --git a/modules/local/pave/germline/main.nf b/modules/local/pave/germline/main.nf index e8baf26d..054922f5 100644 --- a/modules/local/pave/germline/main.nf +++ b/modules/local/pave/germline/main.nf @@ -1,15 +1,11 @@ -// NOTE(SW): use of tumor sample name here is consistent with Pipeline5 -// - https://github.com/hartwigmedical/pipeline5/blob/v5.33/cluster/src/main/java/com/hartwig/pipeline/tertiary/pave/PaveGermline.java#L36-L41 -// - https://github.com/hartwigmedical/pipeline5/blob/v5.33/cluster/src/main/java/com/hartwig/pipeline/tertiary/pave/PaveArguments.java#L31-L43 - process PAVE_GERMLINE { tag "${meta.id}" label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.7.1--hdfd78af_0' : - 'biocontainers/hmftools-pave:1.7.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.8--hdfd78af_1' : + 'biocontainers/hmftools-pave:1.8--hdfd78af_1' }" input: tuple val(meta), path(sage_vcf), path(sage_tbi) @@ -22,12 +18,12 @@ process PAVE_GERMLINE { path segment_mappability path driver_gene_panel path ensembl_data_resources - path gnomad_resource output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.vcf.gz.tbi"), emit: index + tuple val(meta), path('*.vcf.gz') , emit: vcf + tuple val(meta), path('*.vcf.gz.tbi'), emit: index path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -35,23 +31,15 @@ process PAVE_GERMLINE { script: def args = task.ext.args ?: '' - def xmx_mod = task.ext.xmx_mod ?: 0.75 - - def gnomad_args - if (genome_ver.toString() == '37') { - gnomad_args = "-gnomad_freq_file ${gnomad_resource}" - } else if (genome_ver.toString() == '38') { - gnomad_args = "-gnomad_freq_dir ${gnomad_resource}" - } else { - error "got bad genome version: ${genome_ver}" - } + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' """ pave \\ - -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ ${args} \\ -sample ${meta.sample_id} \\ - -vcf_file ${sage_vcf} \\ + -input_vcf ${sage_vcf} \\ + -output_vcf ${meta.sample_id}.pave.germline.vcf.gz \\ -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ -clinvar_vcf ${clinvar_annotations} \\ @@ -60,21 +48,20 @@ process PAVE_GERMLINE { -ensembl_data_dir ${ensembl_data_resources} \\ -blacklist_bed ${sage_blocklist_regions} \\ -blacklist_vcf ${sage_blocklist_sites} \\ - ${gnomad_args} \\ -gnomad_no_filter \\ - -read_pass_only \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -output_dir ./ cat <<-END_VERSIONS > versions.yml "${task.process}": - pave: \$(pave -version | sed -n '/^Pave version / { s/^.* //p }') + pave: \$(pave -version | sed 's/^.* //') END_VERSIONS """ stub: """ - touch ${meta.sample_id}.sage.pave_germline.vcf.gz{,.tbi} + touch ${meta.sample_id}.pave.germline.vcf.gz{,.tbi} echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ diff --git a/modules/local/pave/germline/meta.yml b/modules/local/pave/germline/meta.yml index 5d209cec..2981f457 100644 --- a/modules/local/pave/germline/meta.yml +++ b/modules/local/pave/germline/meta.yml @@ -60,8 +60,6 @@ input: - ensembl_data_resources: type: directory description: HMF ensembl data resources directory - - gnomad_resource: - description: gnomAD resource output: - meta: type: map @@ -80,5 +78,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/esvee/assemble/environment.yml b/modules/local/pave/pon_creation/environment.yml similarity index 54% rename from modules/local/esvee/assemble/environment.yml rename to modules/local/pave/pon_creation/environment.yml index 96a1ad40..c92d09fa 100644 --- a/modules/local/esvee/assemble/environment.yml +++ b/modules/local/pave/pon_creation/environment.yml @@ -1,7 +1,7 @@ -name: esvee_assemble +name: pave_pon_creation channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::hmftools-esvee=1.0.3 + - bioconda::hmftools-pave=1.8 diff --git a/modules/local/pave/pon_creation/main.nf b/modules/local/pave/pon_creation/main.nf new file mode 100644 index 00000000..def1768b --- /dev/null +++ b/modules/local/pave/pon_creation/main.nf @@ -0,0 +1,54 @@ +process PAVE_PON_PANEL_CREATION { + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.8--hdfd78af_1' : + 'biocontainers/hmftools-pave:1.8--hdfd78af_1' }" + + input: + tuple path(sage_vcf), path(sage_tbi) + val genome_ver + + output: + path 'pave.somatic_artefacts.*.tsv', emit: pave_artefacts + path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + + """ + ( + echo SampleId + basename -s .sage.somatic.vcf.gz -a *.sage.somatic.vcf.gz + ) > sample_ids.txt + + pave \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.pave.pon_gen.PonBuilder \\ + ${args} \\ + -sample_id_file sample_ids.txt \\ + -vcf_path '*.sage.somatic.vcf.gz' \\ + -ref_genome_version ${genome_ver} \\ + ${log_level_arg} \\ + -output_pon_file pave.somatic_artefacts.${genome_ver}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pave: \$(pave -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch pave.somatic_artefacts.${genome_ver}.tsv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/pave/pon_creation/meta.yml b/modules/local/pave/pon_creation/meta.yml new file mode 100644 index 00000000..c3cd28c6 --- /dev/null +++ b/modules/local/pave/pon_creation/meta.yml @@ -0,0 +1,41 @@ +name: pave_pon_creation +description: Annotate small variant VCF with gene, transcript coding and protein effects +keywords: + - pave + - annotation + - gene + - transcript + - protein + - vcf +tools: + - pave: + description: Annotates small variant VCF with gene, transcript coding and protein effects. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/pave + documentation: https://github.com/hartwigmedical/hmftools/tree/master/pave + licence: ["GPL v3"] +input: + - sage_vcf: + type: file + description: SAGE VCF file + pattern: "*.{vcf.gz}" + - sage_tbi: + type: file + description: SAGE VCF index file + pattern: "*.{tbi}" + - genome_ver: + type: string + description: Reference genome version +output: + - pave_artefacts: + type: file + description: PAVE artefacts file + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - command_files: + type: list + description: List of command files +authors: + - "@scwatts" diff --git a/modules/local/pave/somatic/environment.yml b/modules/local/pave/somatic/environment.yml index 7fa3bb0b..736cc619 100644 --- a/modules/local/pave/somatic/environment.yml +++ b/modules/local/pave/somatic/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-pave=1.7.1 + - bioconda::hmftools-pave=1.8 diff --git a/modules/local/pave/somatic/main.nf b/modules/local/pave/somatic/main.nf index 7298e39a..46c59a33 100644 --- a/modules/local/pave/somatic/main.nf +++ b/modules/local/pave/somatic/main.nf @@ -4,16 +4,16 @@ process PAVE_SOMATIC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.7.1--hdfd78af_0' : - 'biocontainers/hmftools-pave:1.7.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.8--hdfd78af_1' : + 'biocontainers/hmftools-pave:1.8--hdfd78af_1' }" input: tuple val(meta), path(sage_vcf), path(sage_tbi) path genome_fasta val genome_ver path genome_fai - path sage_pon path pon_artefacts + path sage_pon path clinvar_annotations path segment_mappability path driver_gene_panel @@ -21,9 +21,10 @@ process PAVE_SOMATIC { path gnomad_resource output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.vcf.gz.tbi"), emit: index + tuple val(meta), path('*.vcf.gz') , emit: vcf + tuple val(meta), path('*.vcf.gz.tbi'), emit: index path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -33,13 +34,12 @@ process PAVE_SOMATIC { def xmx_mod = task.ext.xmx_mod ?: 0.75 - def pon_filters + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def gnomad_args if (genome_ver.toString() == '37') { - pon_filters = 'HOTSPOT:10:5;PANEL:6:5;UNKNOWN:6:0' gnomad_args = "-gnomad_freq_file ${gnomad_resource}" } else if (genome_ver.toString() == '38') { - pon_filters = 'HOTSPOT:6:5;PANEL:3:3;UNKNOWN:3:0' gnomad_args = "-gnomad_freq_dir ${gnomad_resource}" } else { error "got bad genome version: ${genome_ver}" @@ -53,19 +53,19 @@ process PAVE_SOMATIC { -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ ${args} \\ -sample ${meta.sample_id} \\ - -vcf_file ${sage_vcf} \\ + -input_vcf ${sage_vcf} \\ + -output_vcf ${meta.sample_id}.pave.somatic.vcf.gz \\ -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ - -pon_file ${sage_pon} \\ - -pon_filters "${pon_filters}" \\ ${pon_artefact_arg} \\ + -pon_file ${sage_pon} \\ + ${gnomad_args} \\ -clinvar_vcf ${clinvar_annotations} \\ -driver_gene_panel ${driver_gene_panel} \\ -mappability_bed ${segment_mappability} \\ -ensembl_data_dir ${ensembl_data_resources} \\ - ${gnomad_args} \\ - -read_pass_only \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -output_dir ./ cat <<-END_VERSIONS > versions.yml @@ -76,7 +76,7 @@ process PAVE_SOMATIC { stub: """ - touch ${meta.sample_id}.sage.pave_somatic.vcf.gz{,.tbi} + touch ${meta.sample_id}.pave.somatic.vcf.gz{,.tbi} echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ diff --git a/modules/local/pave/somatic/meta.yml b/modules/local/pave/somatic/meta.yml index af814d51..8734eb2f 100644 --- a/modules/local/pave/somatic/meta.yml +++ b/modules/local/pave/somatic/meta.yml @@ -38,13 +38,13 @@ input: type: file description: Reference genome assembly fai file pattern: "*.{fai}" + - pon_artefacts: + type: file + description: Taregeted sequencing PON artefacts file (optional) - sage_pon: type: file description: SAGE PON file pattern: "*.{tsv.gz}" - - pon_artefacts: - type: file - description: Taregeted sequencing PON artefacts file (optional) - clinvar_annotations: type: file description: ClinVar annotations VCF file @@ -80,5 +80,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/peach/main.nf b/modules/local/peach/main.nf index 6497803f..4bcb0047 100644 --- a/modules/local/peach/main.nf +++ b/modules/local/peach/main.nf @@ -16,6 +16,7 @@ process PEACH { output: tuple val(meta), path('peach/'), emit: peach_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -25,6 +26,8 @@ process PEACH { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ peach \\ -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ @@ -34,8 +37,10 @@ process PEACH { -haplotypes_file ${haplotypes} \\ -function_file ${haplotype_functions} \\ -drugs_file ${drug_info} \\ + ${log_level_arg} \\ -output_dir peach/ + cat <<-END_VERSIONS > versions.yml "${task.process}": peach: \$(peach -version | sed -n '/Peach version/ { s/^.* //p }') diff --git a/modules/local/peach/meta.yml b/modules/local/peach/meta.yml index 8bb98e2f..8367bf48 100644 --- a/modules/local/peach/meta.yml +++ b/modules/local/peach/meta.yml @@ -15,7 +15,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - germline_vcf: type: file description: PURPLE germline small variant VCF file @@ -37,7 +37,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - peach_dir: type: directory description: PEACH output directory @@ -45,5 +45,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/purple/environment.yml b/modules/local/purple/environment.yml index fb18f1e6..cf79e7a6 100644 --- a/modules/local/purple/environment.yml +++ b/modules/local/purple/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-purple=4.1 + - bioconda::hmftools-purple=4.2 diff --git a/modules/local/purple/main.nf b/modules/local/purple/main.nf index 79460730..1ee95b5b 100644 --- a/modules/local/purple/main.nf +++ b/modules/local/purple/main.nf @@ -4,8 +4,8 @@ process PURPLE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-purple:4.1--hdfd78af_0' : - 'biocontainers/hmftools-purple:4.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-purple:4.2--hdfd78af_0' : + 'biocontainers/hmftools-purple:4.2--hdfd78af_0' }" input: tuple val(meta), path(amber_dir), path(cobalt_dir), path(sv_tumor_vcf), path(sv_tumor_tbi), path(sv_normal_vcf), path(sv_normal_tbi), path(smlv_tumor_vcf), path(smlv_normal_vcf) @@ -26,6 +26,7 @@ process PURPLE { output: tuple val(meta), path('purple/'), emit: purple_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -35,6 +36,8 @@ process PURPLE { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def reference_arg = meta.containsKey('normal_id') ? "-reference ${meta.normal_id}" : '' def sv_tumor_vcf_arg = sv_tumor_vcf ? "-somatic_sv_vcf ${sv_tumor_vcf}" : '' @@ -75,6 +78,7 @@ process PURPLE { -gc_profile ${gc_profile} \\ -circos \$(which circos) \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -output_dir purple/ cat <<-END_VERSIONS > versions.yml @@ -86,6 +90,7 @@ process PURPLE { stub: """ mkdir purple/ + touch purple/${meta.tumor_id}.purple.cnv.gene.tsv touch purple/${meta.tumor_id}.purple.cnv.somatic.tsv touch purple/${meta.tumor_id}.purple.driver.catalog.germline.tsv diff --git a/modules/local/purple/meta.yml b/modules/local/purple/meta.yml index 075721e1..c8602339 100644 --- a/modules/local/purple/meta.yml +++ b/modules/local/purple/meta.yml @@ -17,7 +17,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - amber_dir: type: directory description: AMBER output directory @@ -103,7 +103,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - purple_dir: type: directory description: PURPLE output directory @@ -111,5 +111,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/redux/environment.yml b/modules/local/redux/environment.yml index 81143c4f..4a631aa6 100644 --- a/modules/local/redux/environment.yml +++ b/modules/local/redux/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-redux=1.1.2 + - bioconda::hmftools-redux=1.2 diff --git a/modules/local/redux/main.nf b/modules/local/redux/main.nf index 12123ccb..cc342e2d 100644 --- a/modules/local/redux/main.nf +++ b/modules/local/redux/main.nf @@ -4,8 +4,8 @@ process REDUX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-redux:1.1.2--hdfd78af_0' : - 'biocontainers/hmftools-redux:1.1.2--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-redux:1.2--hdfd78af_0' : + 'biocontainers/hmftools-redux:1.2--hdfd78af_0' }" input: tuple val(meta), path(bams), path(bais) @@ -24,6 +24,7 @@ process REDUX { tuple val(meta), path('*.jitter_params.tsv') , emit: jitter_tsv tuple val(meta), path('*.ms_table.tsv.gz') , emit: ms_tsv path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -33,6 +34,8 @@ process REDUX { def xmx_mod = task.ext.xmx_mod ?: 0.95 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + def form_consensus_arg = umi_enable ? '' : '-form_consensus' def umi_args_list = [] @@ -45,19 +48,19 @@ process REDUX { -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ ${args} \\ -sample ${meta.sample_id} \\ + ${form_consensus_arg} \\ + ${umi_args} \\ -input_bam ${bams.join(',')} \\ - -output_dir ./ \\ -output_bam ./${meta.sample_id}.redux.bam \\ -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ - -unmap_regions ${unmap_regions} \\ -ref_genome_msi_file ${msi_jitter_sites} \\ + -unmap_regions ${unmap_regions} \\ -bamtool \$(which samtools) \\ - ${form_consensus_arg} \\ - ${umi_args} \\ -write_stats \\ -threads ${task.cpus} \\ - -log_level DEBUG + ${log_level_arg} \\ + -output_dir ./ cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/redux/meta.yml b/modules/local/redux/meta.yml index 062634cc..44f137e9 100644 --- a/modules/local/redux/meta.yml +++ b/modules/local/redux/meta.yml @@ -14,7 +14,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - bams: type: list description: List BAM files @@ -55,7 +55,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - bam: type: list description: BAM and BAI file @@ -76,6 +76,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" - "@mkcmkc" diff --git a/modules/local/sage/append/environment.yml b/modules/local/sage/append/environment.yml index afd7f6ad..59e9d98a 100644 --- a/modules/local/sage/append/environment.yml +++ b/modules/local/sage/append/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-sage=4.0 + - bioconda::hmftools-sage=4.1 diff --git a/modules/local/sage/append/main.nf b/modules/local/sage/append/main.nf index 5062c564..f0a9c799 100644 --- a/modules/local/sage/append/main.nf +++ b/modules/local/sage/append/main.nf @@ -4,19 +4,21 @@ process SAGE_APPEND { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.0--hdfd78af_0' : - 'biocontainers/hmftools-sage:4.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.1--hdfd78af_0' : + 'biocontainers/hmftools-sage:4.1--hdfd78af_0' }" input: - tuple val(meta), path(vcf), path(bam), path(bai) + tuple val(meta), path(vcf), path(bams), path(bais), path(redux_tsvs) path genome_fasta val genome_ver path genome_fai path genome_dict + val targeted_mode output: - tuple val(meta), path('*.append.vcf.gz'), emit: vcf - path 'versions.yml' , emit: versions + tuple val(meta), path('sage_append'), emit: sage_append_dir + path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -26,24 +28,30 @@ process SAGE_APPEND { def xmx_mod = task.ext.xmx_mod ?: 0.75 - def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) - def high_depth_mode_arg = (run_mode === Constants.RunMode.TARGETED) ? '-high_depth_mode' : '' + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + + def skip_msi_jitter_arg = !redux_tsvs ? '-skip_msi_jitter' : '' + def high_depth_mode_arg = targeted_mode ? '-high_depth_mode' : '' """ + mkdir -p sage_append/ + sage \\ -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ com.hartwig.hmftools.sage.append.SageAppendApplication \\ ${args} \\ -input_vcf ${vcf} \\ - -reference ${meta.tumor_rna_id} \\ - -reference_bam ${bam} \\ + -max_read_depth 100000 \\ + -reference ${meta.reference_ids.join(',')} \\ + -reference_bam ${bams.join(',')} \\ -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ - -skip_msi_jitter \\ -write_frag_lengths \\ ${high_depth_mode_arg} \\ + ${skip_msi_jitter_arg} \\ -threads ${task.cpus} \\ - -output_vcf ${meta.dna_id}.sage.append.vcf.gz + ${log_level_arg} \\ + -output_vcf sage_append/${meta.output_file_id}.sage.append.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -53,7 +61,12 @@ process SAGE_APPEND { stub: """ - touch "${meta.dna_id}.sage.append.vcf.gz" + mkdir -p sage_append/ + + touch sage_append/${meta.output_file_id}.frag_lengths.tsv.gz + touch sage_append/${meta.output_file_id}.sage.append.vcf.gz + touch sage_append/${meta.output_file_id}.sage.append.vcf.gz.tbi + touch sage_append/${meta.output_file_id}_query.sage.bqr.tsv echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml """ diff --git a/modules/local/sage/append/meta.yml b/modules/local/sage/append/meta.yml index 970d2539..b5a41e42 100644 --- a/modules/local/sage/append/meta.yml +++ b/modules/local/sage/append/meta.yml @@ -15,19 +15,20 @@ input: type: map description: | Groovy Map containing sample informatio - e.g. [id: 'sample_id', append_id: 'sample_id_append'] + e.g. [id: 'sample_id'] - vcf: type: file description: VCF file pattern: "*.{vcf.gz}" - - bam: - type: file - description: BAM file - pattern: "*.{bam}" - - bai: - type: file - description: BAI file - pattern: "*.{bai}" + - bams: + type: list + description: BAM files + - bais: + type: list + description: BAI files + - redux_tsvs: + type: list + description: REDUX MS files - genome_fasta: type: file description: Reference genome assembly FASTA file @@ -43,19 +44,24 @@ input: type: file description: Reference genome assembly dict file pattern: "*.{dict}" + - targeted_mode: + type: boolean + description: Flag indicating whether targeted mode is set output: - meta: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', append_id: 'sample_id_append'] - - vcf: - type: file - description: VCF file - pattern: "*.{vcf.gz}" + e.g. [id: 'sample_id'] + - sage_append_dir: + type: directory + description: SAGE append output directory - versions: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/sage/germline/environment.yml b/modules/local/sage/germline/environment.yml index 8cac1e92..3f3782e1 100644 --- a/modules/local/sage/germline/environment.yml +++ b/modules/local/sage/germline/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-sage=4.0 + - bioconda::hmftools-sage=4.1 diff --git a/modules/local/sage/germline/main.nf b/modules/local/sage/germline/main.nf index 2c618061..7e35ec75 100644 --- a/modules/local/sage/germline/main.nf +++ b/modules/local/sage/germline/main.nf @@ -1,11 +1,11 @@ process SAGE_GERMLINE { tag "${meta.id}" - label 'process_high' + label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.0--hdfd78af_0' : - 'biocontainers/hmftools-sage:4.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.1--hdfd78af_0' : + 'biocontainers/hmftools-sage:4.1--hdfd78af_0' }" input: tuple val(meta), path(tumor_bam), path(normal_bam), path(tumor_bai), path(normal_bai), path(redux_tsvs) @@ -14,18 +14,16 @@ process SAGE_GERMLINE { path genome_fai path genome_dict path sage_known_hotspots_germline - path sage_actionable_panel - path sage_coverage_panel path sage_highconf_regions + path driver_gene_panel path ensembl_data_resources + val targeted_mode output: tuple val(meta), path('germline/*.sage.germline.vcf.gz'), path('germline/*.sage.germline.vcf.gz.tbi'), emit: vcf tuple val(meta), path('germline/') , emit: sage_dir path 'versions.yml' , emit: versions - - def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) - def high_depth_mode_arg = (run_mode === Constants.RunMode.TARGETED) ? '-high_depth_mode' : '' + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -33,43 +31,46 @@ process SAGE_GERMLINE { script: def args = task.ext.args ?: '' - def xmx_mod = task.ext.xmx_mod ?: 0.95 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + + def high_depth_mode_arg = targeted_mode ? '-high_depth_mode' : '' """ mkdir -p germline/ sage \\ - -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ ${args} \\ -tumor ${meta.normal_id} \\ -tumor_bam ${normal_bam} \\ -reference ${meta.tumor_id} \\ -reference_bam ${tumor_bam} \\ -jitter_param_dir ./ \\ + -ref_sample_count 0 \\ -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ -hotspots ${sage_known_hotspots_germline} \\ - -panel_bed ${sage_actionable_panel} \\ - -coverage_bed ${sage_coverage_panel} \\ + -driver_gene_panel ${driver_gene_panel} \\ -high_confidence_bed ${sage_highconf_regions} \\ -ensembl_data_dir ${ensembl_data_resources} \\ -germline \\ -panel_only \\ - -ref_sample_count 0 \\ ${high_depth_mode_arg} \\ -bqr_write_plot \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -output_vcf germline/${meta.tumor_id}.sage.germline.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": - sage: \$(sage -version | sed -n '/^Sage version / { s/^.* //p }') + sage: \$(sage -version | sed 's/^.* //') END_VERSIONS """ stub: """ mkdir -p germline/ + touch germline/${meta.tumor_id}.sage.germline.vcf.gz touch germline/${meta.tumor_id}.sage.germline.vcf.gz.tbi touch germline/${meta.tumor_id}.sage.bqr.png diff --git a/modules/local/sage/germline/meta.yml b/modules/local/sage/germline/meta.yml index 942d3a6a..5fb156d4 100644 --- a/modules/local/sage/germline/meta.yml +++ b/modules/local/sage/germline/meta.yml @@ -16,7 +16,7 @@ input: type: map description: | Groovy Map containing sample informatio - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - tumor_bam: type: file description: Tumor BAM file @@ -55,18 +55,14 @@ input: type: file description: SAGE germline known hotspots file pattern: "*.{vcf.gz}" - - sage_actionable_panel: - type: file - description: SAGE actionable panel file - pattern: "*.{bed.gz}" - - sage_coverage_panel: - type: file - description: SAGE coverage gene panel file - pattern: "*.{bed.gz}" - sage_highconf_regions: type: file description: SAGE high confidence regions file pattern: "*.{bed.gz}" + - driver_gene_panel: + type: file + description: Driver gene panel file + pattern: "*.{tsv}" - ensembl_data_resources: type: directory description: HMF ensembl data resources directory @@ -75,7 +71,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - vcf: type: file description: VCF file @@ -87,5 +83,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/sage/somatic/environment.yml b/modules/local/sage/somatic/environment.yml index 3e75984b..1dbb5f2f 100644 --- a/modules/local/sage/somatic/environment.yml +++ b/modules/local/sage/somatic/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-sage=4.0 + - bioconda::hmftools-sage=4.1 diff --git a/modules/local/sage/somatic/main.nf b/modules/local/sage/somatic/main.nf index 07667eae..84fc55de 100644 --- a/modules/local/sage/somatic/main.nf +++ b/modules/local/sage/somatic/main.nf @@ -6,8 +6,8 @@ process SAGE_SOMATIC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.0--hdfd78af_0' : - 'biocontainers/hmftools-sage:4.0--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-sage:4.1--hdfd78af_0' : + 'biocontainers/hmftools-sage:4.1--hdfd78af_0' }" input: tuple val(meta), path(tumor_bam), path(normal_bam), path(donor_bam), path(tumor_bai), path(normal_bai), path(donor_bai), path(redux_tsvs) @@ -15,16 +15,19 @@ process SAGE_SOMATIC { val genome_ver path genome_fai path genome_dict + path sage_pon path sage_known_hotspots_somatic - path sage_actionable_panel - path sage_coverage_panel path sage_highconf_regions + path driver_gene_panel path ensembl_data_resources + path gnomad_resource + val targeted_mode output: tuple val(meta), path('somatic/*.sage.somatic.vcf.gz'), path('somatic/*.sage.somatic.vcf.gz.tbi'), emit: vcf tuple val(meta), path('somatic/') , emit: sage_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -32,28 +35,53 @@ process SAGE_SOMATIC { script: def args = task.ext.args ?: '' - def xmx_mod = task.ext.xmx_mod ?: 0.95 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + // Sample IDs def reference_ids = [] - if (meta.normal_id != null) reference_ids.add(meta.normal_id) - if (meta.donor_id != null) reference_ids.add(meta.donor_id) + if (meta.normal_id != null) { reference_ids.add(meta.normal_id) } + if (meta.donor_id != null) { reference_ids.add(meta.donor_id) } def reference_arg = reference_ids.size() > 0 ? "-reference ${String.join(',', reference_ids)}" : '' + def ref_sample_count_arg = reference_ids.size() > 0 ? "-ref_sample_count ${reference_ids.size()}" : '' + // BAMs def reference_bams = [] - if (normal_bam) reference_bams.add(normal_bam.toString()) - if (donor_bam) reference_bams.add(donor_bam.toString()) + if (normal_bam) { reference_bams.add(normal_bam.toString()) } + if (donor_bam) { reference_bams.add(donor_bam.toString()) } def reference_bam_arg = reference_bams.size() > 0 ? "-reference_bam ${String.join(',', reference_bams)}" : '' - def ref_sample_count_arg = "-ref_sample_count ${reference_ids.size()}" - - def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) - def high_depth_mode_arg = (run_mode === Constants.RunMode.TARGETED) ? '-high_depth_mode' : '' + // Tumor in normal contamination (TINC): only for WGS tumor/normal samples + def run_tinc_arg = '' + def write_fit_variants_arg = '' + def gnomad_arg = '' + def pon_file_arg = '' + + if (!targeted_mode && tumor_bam && normal_bam) { + run_tinc_arg = '-run_tinc' + pon_file_arg = "-pon_file ${sage_pon}" + write_fit_variants_arg = '-write_fit_variants' + + if (genome_ver.toString() == '37') { + gnomad_arg = "-gnomad_freq_file ${gnomad_resource}" + } else if (genome_ver.toString() == '38') { + gnomad_arg = "-gnomad_freq_dir ${gnomad_resource}" + } else { + error "got bad genome version: ${genome_ver}" + } + } + + // NOTE(SW): use of ternary inexplicitly causes a 'variable already defined in scope error' + if (targeted_mode) { + high_depth_mode_arg = '-high_depth_mode' + } else { + high_depth_mode_arg = '' + } """ mkdir -p somatic/ sage \\ - -Xmx${Math.round(task.memory.bytes * xmx_mod)} \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ ${args} \\ ${reference_arg} \\ ${reference_bam_arg} \\ @@ -64,18 +92,22 @@ process SAGE_SOMATIC { -ref_genome ${genome_fasta} \\ -ref_genome_version ${genome_ver} \\ -hotspots ${sage_known_hotspots_somatic} \\ - -panel_bed ${sage_actionable_panel} \\ - -coverage_bed ${sage_coverage_panel} \\ + -driver_gene_panel ${driver_gene_panel} \\ -high_confidence_bed ${sage_highconf_regions} \\ -ensembl_data_dir ${ensembl_data_resources} \\ + ${pon_file_arg} \\ + ${gnomad_arg} \\ + ${run_tinc_arg} \\ ${high_depth_mode_arg} \\ -bqr_write_plot \\ + ${write_fit_variants_arg} \\ -threads ${task.cpus} \\ + ${log_level_arg} \\ -output_vcf somatic/${meta.tumor_id}.sage.somatic.vcf.gz cat <<-END_VERSIONS > versions.yml "${task.process}": - sage: \$(sage -version | sed -n '/^Sage version / { s/^.* //p }') + sage: \$(sage -version | sed 's/^.* //') END_VERSIONS """ diff --git a/modules/local/sage/somatic/meta.yml b/modules/local/sage/somatic/meta.yml index a70d6129..9fd096b1 100644 --- a/modules/local/sage/somatic/meta.yml +++ b/modules/local/sage/somatic/meta.yml @@ -16,7 +16,7 @@ input: type: map description: | Groovy Map containing sample informatio - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - tumor_bam: type: file description: Tumor BAM file @@ -59,31 +59,34 @@ input: type: file description: Reference genome assembly dict file pattern: "*.{dict}" + - sage_pon: + type: file + description: SAGE PON file (optional) + pattern: "*.{tsv.gz}" - sage_known_hotspots_somatic: type: file description: SAGE somatic known hotspots file pattern: "*.{vcf.gz}" - - sage_actionable_panel: - type: file - description: SAGE actionable gene panel file - pattern: "*.{bed.gz}" - - sage_coverage_panel: - type: file - description: SAGE coverage gene panel file - pattern: "*.{bed.gz}" - sage_highconf_regions: type: file description: SAGE high confidence regions file pattern: "*.{bed.gz}" + - driver_gene_panel: + type: file + description: Driver gene panel file + pattern: "*.{tsv}" - ensembl_data_resources: type: directory description: HMF ensembl data resources directory + - targeted_mode: + type: boolean + description: Flag indicating whether targeted mode is set output: - meta: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - vcf: type: file description: SAGE VCF file @@ -95,5 +98,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/sambamba/merge/main.nf b/modules/local/sambamba/merge/main.nf index d67b9743..31b29013 100644 --- a/modules/local/sambamba/merge/main.nf +++ b/modules/local/sambamba/merge/main.nf @@ -13,6 +13,7 @@ process SAMBAMBA_MERGE { output: tuple val(meta), path('*bam'), emit: bam path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/sambamba/merge/meta.yml b/modules/local/sambamba/merge/meta.yml index c4424055..849ef025 100644 --- a/modules/local/sambamba/merge/meta.yml +++ b/modules/local/sambamba/merge/meta.yml @@ -15,7 +15,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - bams: type: list description: List BAM files @@ -24,7 +24,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - bam: type: file description: BAM file @@ -33,6 +33,9 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" - "@mkcmkc" diff --git a/modules/local/sigs/main.nf b/modules/local/sigs/main.nf index 96a210f9..fd95383d 100644 --- a/modules/local/sigs/main.nf +++ b/modules/local/sigs/main.nf @@ -14,6 +14,7 @@ process SIGS { output: tuple val(meta), path('sigs/'), emit: sigs_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -23,6 +24,8 @@ process SIGS { def xmx_mod = task.ext.xmx_mod ?: 0.75 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ mkdir -p sigs/ @@ -32,6 +35,7 @@ process SIGS { -sample ${meta.sample_id} \\ -somatic_vcf_file ${smlv_vcf} \\ -signatures_file ${signatures} \\ + ${log_level_arg} \\ -output_dir sigs/ cat <<-END_VERSIONS > versions.yml @@ -43,6 +47,7 @@ process SIGS { stub: """ mkdir -p sigs/ + touch sigs/placeholder echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml diff --git a/modules/local/sigs/meta.yml b/modules/local/sigs/meta.yml index 70dd85b6..767f5b2b 100644 --- a/modules/local/sigs/meta.yml +++ b/modules/local/sigs/meta.yml @@ -14,7 +14,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name'] + e.g. [id: 'sample_id'] - smlv_vcf: type: file description: Small somatic variant VCF file @@ -28,7 +28,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - sigs_dir: type: directory description: Sigs output directory @@ -36,5 +36,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/star/align/main.nf b/modules/local/star/align/main.nf index 66d7c392..68c3906e 100644 --- a/modules/local/star/align/main.nf +++ b/modules/local/star/align/main.nf @@ -14,6 +14,7 @@ process STAR_ALIGN { output: tuple val(meta), path('*bam'), emit: bam path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/star/align/meta.yml b/modules/local/star/align/meta.yml index 19bb83c5..4f7283a7 100644 --- a/modules/local/star/align/meta.yml +++ b/modules/local/star/align/meta.yml @@ -16,7 +16,7 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - reads_fwd: type: file description: Forward reads FASTQ file @@ -33,7 +33,7 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - bam: type: file description: BAM file @@ -42,5 +42,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/teal/pipeline/environment.yml b/modules/local/teal/pipeline/environment.yml index c9466094..9810b8a1 100644 --- a/modules/local/teal/pipeline/environment.yml +++ b/modules/local/teal/pipeline/environment.yml @@ -1,4 +1,4 @@ -name: teal +name: teal_pipeline channels: - conda-forge - bioconda diff --git a/modules/local/teal/pipeline/main.nf b/modules/local/teal/pipeline/main.nf index 5712455e..4aeea3eb 100644 --- a/modules/local/teal/pipeline/main.nf +++ b/modules/local/teal/pipeline/main.nf @@ -9,14 +9,20 @@ process TEAL_PIPELINE { input: tuple val(meta), - path(tumor_teal_bam), path(tumor_teal_bai), - path(normal_teal_bam), path(normal_teal_bai), - path(tumor_metrics_dir), path(normal_metrics_dir), path(cobalt_dir), path(purple_dir) + path(tumor_teal_bam), + path(tumor_teal_bai), + path(normal_teal_bam), + path(normal_teal_bai), + path(tumor_metrics_dir), + path(normal_metrics_dir), + path(cobalt_dir), + path(purple_dir) val genome_ver output: tuple val(meta), path('teal/*.tsv*'), emit: teal_tsvs path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -35,8 +41,13 @@ process TEAL_PIPELINE { def reference_bam_arg = normal_teal_bam ? "-reference_bam ${normal_teal_bam}" : '' def reference_wgs_metrics_arg = normal_metrics_dir ? "-reference_wgs_metrics ${normal_metrics_dir}/${meta.normal_id}.bam_metric.summary.tsv" : '' - if (tumor_arg && ! purple_arg) error "TEAL requires PURPLE inputs when analysing tumor data" - if (! tumor_arg && ! reference_arg) error "TEAL at least tumor or normal data for analyses" + if (tumor_arg && !purple_arg) { + error 'TEAL requires PURPLE inputs when analysing tumor data' + } + + if (!tumor_arg && !reference_arg) { + error 'TEAL at least tumor or normal data for analyses' + } """ teal \\ diff --git a/modules/local/teal/pipeline/meta.yml b/modules/local/teal/pipeline/meta.yml index 81e5171c..57bced64 100644 --- a/modules/local/teal/pipeline/meta.yml +++ b/modules/local/teal/pipeline/meta.yml @@ -16,22 +16,22 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] - - tumor_bam: + e.g. [id: 'sample_id'] + - tumor_teal_bam: type: file - description: Tumor BAM file (optional) + description: Tumor TEAL BAM file (optional) pattern: "*.{bam}" - - normal_bam: + - tumor_teal_bai: type: file - description: Normal BAM file (optional) - pattern: "*.{bam}" - - tumor_bai: - type: file - description: Tumor BAI file (optional) + description: Tumor TEAL BAI file (optional) pattern: "*.{bai}" - - normal_bai: + - normal_teal_bam: type: file - description: Normal BAI file (optional) + description: Normal TEAL BAM file (optional) + pattern: "*.{bam}" + - normal_teal_bai: + type: file + description: Normal TEAL BAI file (optional) pattern: "*.{bai}" - tumor_wgs_metrics_dir: type: directory @@ -53,13 +53,17 @@ output: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] - - teal_dir: - type: directory + e.g. [id: 'sample_id'] + - teal_tsvs: + type: list description: TEAL output directory + pattern: "*.{tsv,tsv.*}" - versions: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/teal/prep/environment.yml b/modules/local/teal/prep/environment.yml index c9466094..4a826d86 100644 --- a/modules/local/teal/prep/environment.yml +++ b/modules/local/teal/prep/environment.yml @@ -1,4 +1,4 @@ -name: teal +name: teal_prep channels: - conda-forge - bioconda diff --git a/modules/local/teal/prep/main.nf b/modules/local/teal/prep/main.nf index 0845571e..47910b58 100644 --- a/modules/local/teal/prep/main.nf +++ b/modules/local/teal/prep/main.nf @@ -12,9 +12,10 @@ process TEAL_PREP { val genome_ver output: - tuple val(meta), path("teal_bam/${meta.tumor_id}.teal.telbam{.bam,.bam.bai}") , emit: tumor_bam - tuple val(meta), path("teal_bam/${meta.normal_id}.teal.telbam{.bam,.bam.bai}"), emit: normal_bam, optional: true - path 'versions.yml', emit: versions + tuple val(meta), path("teal_bam/${meta.tumor_id}.teal.telbam{.bam,.bam.bai}") , emit: tumor_teal_bam + tuple val(meta), path("teal_bam/${meta.normal_id}.teal.telbam{.bam,.bam.bai}"), emit: normal_teal_bam, optional: true + path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -24,21 +25,19 @@ process TEAL_PREP { def xmx_mod = task.ext.xmx_mod ?: 0.95 - def tumor_arg = "" - def tumor_bam_arg = "" - def tumor_bam_index_command = "" - - if(tumor_bam) { + def tumor_arg = '' + def tumor_bam_arg = '' + def tumor_bam_index_command = '' + if (tumor_bam) { tumor_arg = "-tumor ${meta.tumor_id}" tumor_bam_arg = "-tumor_bam ${tumor_bam}" tumor_bam_index_command = "samtools index teal_bam/${meta.tumor_id}.teal.telbam.bam" } - def reference_arg = "" - def reference_bam_arg = "" - def reference_bam_index_command = "" - - if(normal_bam) { + def reference_arg = '' + def reference_bam_arg = '' + def reference_bam_index_command = '' + if (normal_bam) { reference_arg = "-reference ${meta.normal_id}" reference_bam_arg = "-reference_bam ${normal_bam}" reference_bam_index_command = "samtools index teal_bam/${meta.normal_id}.teal.telbam.bam" diff --git a/modules/local/teal/prep/meta.yml b/modules/local/teal/prep/meta.yml index 81e5171c..f0cbf94f 100644 --- a/modules/local/teal/prep/meta.yml +++ b/modules/local/teal/prep/meta.yml @@ -16,50 +16,43 @@ input: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] + e.g. [id: 'sample_id'] - tumor_bam: type: file description: Tumor BAM file (optional) pattern: "*.{bam}" - - normal_bam: - type: file - description: Normal BAM file (optional) - pattern: "*.{bam}" - tumor_bai: type: file description: Tumor BAI file (optional) pattern: "*.{bai}" + - normal_bam: + type: file + description: Normal BAM file (optional) + pattern: "*.{bam}" - normal_bai: type: file description: Normal BAI file (optional) pattern: "*.{bai}" - - tumor_wgs_metrics_dir: - type: directory - description: Tumor WGS metrics directory (optional) - - normal_wgs_metrics: - type: directory - description: Normal WGS metrics directory (optional) - - cobalt_dir: - type: directory - description: COBALT output directory - - purple_dir: - type: directory - description: PURPLE output directory (optional) - - genome_ver: - type: string - description: Reference genome version output: - meta: type: map description: | Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] - - teal_dir: - type: directory - description: TEAL output directory + e.g. [id: 'sample_id'] + - tumor_teal_bam: + type: list + description: Tumor TEAL BAM and BAI file + pattern: "*.{bam,bai}" + - normal_teal_bam: + type: list + description: Normal TEAL BAM and BAI file + pattern: "*.{bam,bai}" - versions: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/virusbreakend/main.nf b/modules/local/virusbreakend/main.nf index 973c02cd..b78272ba 100644 --- a/modules/local/virusbreakend/main.nf +++ b/modules/local/virusbreakend/main.nf @@ -17,9 +17,10 @@ process VIRUSBREAKEND { path gridss_config output: - tuple val(meta), path("*.summary.tsv"), emit: tsv - path "*.virusbreakend.vcf" , emit: vcf + tuple val(meta), path('*.summary.tsv'), emit: tsv + path '*.virusbreakend.vcf' , emit: vcf path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when diff --git a/modules/local/virusbreakend/meta.yml b/modules/local/virusbreakend/meta.yml index a61ff589..35224d48 100644 --- a/modules/local/virusbreakend/meta.yml +++ b/modules/local/virusbreakend/meta.yml @@ -59,5 +59,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/virusinterpreter/environment.yml b/modules/local/virusinterpreter/environment.yml index 8bc81120..36f605a8 100644 --- a/modules/local/virusinterpreter/environment.yml +++ b/modules/local/virusinterpreter/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::hmftools-virus-interpreter=1.7 + - bioconda::hmftools-virus-interpreter=1.7.1 diff --git a/modules/local/virusinterpreter/main.nf b/modules/local/virusinterpreter/main.nf index c15b9b1e..64c6d159 100644 --- a/modules/local/virusinterpreter/main.nf +++ b/modules/local/virusinterpreter/main.nf @@ -4,8 +4,8 @@ process VIRUSINTERPRETER { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/hmftools-virus-interpreter:1.7--hdfd78af_0' : - 'biocontainers/hmftools-virus-interpreter:1.7--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/hmftools-virus-interpreter:1.7.1--hdfd78af_0' : + 'biocontainers/hmftools-virus-interpreter:1.7.1--hdfd78af_0' }" input: tuple val(meta), path(virus_tsv), path(purple_dir), path(bamtools_somatic_dir) @@ -16,6 +16,7 @@ process VIRUSINTERPRETER { output: tuple val(meta), path('virusinterpreter/'), emit: virusinterpreter_dir path 'versions.yml' , emit: versions + path '.command.*' , emit: command_files when: task.ext.when == null || task.ext.when @@ -25,6 +26,8 @@ process VIRUSINTERPRETER { def xmx_mod = task.ext.xmx_mod ?: 0.95 + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + """ mkdir -p virusinterpreter/ @@ -38,6 +41,7 @@ process VIRUSINTERPRETER { -taxonomy_db_tsv ${taxonomy_db} \\ -virus_reporting_db_tsv ${reporting_db} \\ -virus_blacklisting_db_tsv ${blocklist_db} \\ + ${log_level_arg} \\ -output_dir virusinterpreter/ cat <<-END_VERSIONS > versions.yml @@ -49,6 +53,7 @@ process VIRUSINTERPRETER { stub: """ mkdir -p virusinterpreter/ + touch virusinterpreter/${meta.sample_id}.virus.annotated.tsv echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml diff --git a/modules/local/virusinterpreter/meta.yml b/modules/local/virusinterpreter/meta.yml index 6cf0ffb2..dd6f0938 100644 --- a/modules/local/virusinterpreter/meta.yml +++ b/modules/local/virusinterpreter/meta.yml @@ -35,7 +35,7 @@ input: type: file description: Virus Interpreter reporting database file pattern: "*.{tsv}" - - blacklist_db: + - blocklist_db: type: file description: Virus Interpreter blocklist database file pattern: "*.{tsv}" @@ -52,5 +52,8 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - command_files: + type: list + description: List of command files authors: - "@scwatts" diff --git a/modules/local/isofox/environment.yml b/modules/local/wisp/environment.yml similarity index 57% rename from modules/local/isofox/environment.yml rename to modules/local/wisp/environment.yml index d4251c57..1d35d2ac 100644 --- a/modules/local/isofox/environment.yml +++ b/modules/local/wisp/environment.yml @@ -1,7 +1,7 @@ -name: isofox +name: wisp channels: - conda-forge - bioconda - defaults dependencies: - - bioconda::hmftools-isofox=1.7.1 + - bioconda::hmftools-wisp=1.2 diff --git a/modules/local/wisp/main.nf b/modules/local/wisp/main.nf new file mode 100644 index 00000000..447c73af --- /dev/null +++ b/modules/local/wisp/main.nf @@ -0,0 +1,105 @@ +process WISP { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-wisp:1.2--hdfd78af_0' : + 'biocontainers/hmftools-wisp:1.2--hdfd78af_0' }" + + input: + tuple val(meta), + path(primary_purple_dir), + path('primary_amber_dir'), + path('sample_amber_dir'), + path(cobalt_dir), + path(sage_append_dir) + path genome_fasta + path genome_fai + val targeted_mode + + output: + path 'wisp/' , emit: wisp_dir + path 'versions.yml', emit: versions + path '.command.*' , emit: command_files + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + def log_level_arg = task.ext.log_level ? "-log_level ${task.ext.log_level}" : '' + + def purity_estimate_mode = Utils.getEnumFromString(params.purity_estimate_mode, Constants.RunMode) + + def purity_methods + def amber_dir_arg + def cobalt_dir_arg + def gc_ratio_min_arg + def write_types_arg + + if (targeted_mode) { + purity_methods = 'SOMATIC_VARIANT' + amber_dir_arg = '' + cobalt_dir_arg = '' + gc_ratio_min_arg = '-gc_ratio_min 0.4' + write_types_arg = "-write_types 'SOMATIC_DATA;SOMATIC_PLOT'" + } else { + purity_methods = "'SOMATIC_VARIANT;AMBER_LOH;COPY_NUMBER'" + amber_dir_arg = '-amber_dir amber_dir__prepared/' + cobalt_dir_arg = "-cobalt_dir ${cobalt_dir}" + gc_ratio_min_arg = '' + write_types_arg = '-write_types ALL' + } + + """ + # Put AMBER outputs from all samples into the same dir + if [[ -n "${amber_dir_arg}" ]]; then + mkdir -p amber_dir__prepared/; + for fp in ${primary_amber_dir}/*.amber.*; do ln -sf ../\$fp amber_dir__prepared/; done + for fp in ${sample_amber_dir}/*.amber.*; do ln -sf ../\$fp amber_dir__prepared/; done + fi; + + # Run WISP + mkdir -p wisp/ + + wisp \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.wisp.purity.PurityEstimator \\ + ${args} \\ + -patient_id ${meta.subject_id} \\ + -tumor_id ${meta.primary_id} \\ + -samples ${meta.longitudinal_id} \\ + -purity_methods ${purity_methods} \\ + -somatic_vcf ${sage_append_dir}/${meta.longitudinal_id}.sage.append.vcf.gz \\ + -purple_dir ${primary_purple_dir} \\ + ${amber_dir_arg} \\ + ${cobalt_dir_arg} \\ + -ref_genome ${genome_fasta} \\ + ${gc_ratio_min_arg} \\ + ${write_types_arg} \\ + ${log_level_arg} \\ + -output_dir wisp/ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + wisp: \$(wisp -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + mkdir -p wisp/ + + touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.cn_plot_calcs.tsv + touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.cn_segments.tsv + touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.somatic_peak.tsv + touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.somatic_variants.tsv + touch wisp/${meta.subject_id}_${meta.longitudinal_id}.wisp.summary.tsv + touch wisp/${meta.longitudinal_id}.cn_gc_ratio_fit.png + touch wisp/${meta.longitudinal_id}.somatic_vaf.png + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/wisp/meta.yml b/modules/local/wisp/meta.yml new file mode 100644 index 00000000..105d7944 --- /dev/null +++ b/modules/local/wisp/meta.yml @@ -0,0 +1,62 @@ +name: wisp +description: Estimates TF in a given sample using biomarkers of an existing sample from the same patient +keywords: + - tumor_fraction + - purity + - ccfdna +tools: + - wisp: + description: Estimates TF in a given sample using biomarkers of an existing sample from the same patient. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/wisp + documentation: https://github.com/hartwigmedical/hmftools/tree/master/wisp + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - primary_purple_dir: + type: directory + description: PURPLE directory of the primary sample + - primary_amber_dir: + type: directory + description: AMBER directory of the primary sample + - sample_amber_dir: + type: directory + description: AMBER directory of the query sample + - cobalt_dir: + type: directory + description: COBALT directory + - sage_append_dir: + type: directory + description: SAGE append directory + - genome_fasta: + type: file + description: Reference genome assembly FASTA file + pattern: "*.{fa,fasta}" + - genome_fai: + type: file + description: Reference genome assembly fai file + pattern: "*.{fai}" + - targeted_mode: + type: boolean + description: Flag indicating whether targeted mode is set +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [id: 'sample_id'] + - wisp_dir: + type: directory + description: WISP output direcotry + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - command_files: + type: list + description: List of command files +authors: + - "@scwatts" diff --git a/modules/nf-core/gatk4/markduplicates/gatk4-markduplicates.diff b/modules/nf-core/gatk4/markduplicates/gatk4-markduplicates.diff index a2eac783..6bb68a97 100644 --- a/modules/nf-core/gatk4/markduplicates/gatk4-markduplicates.diff +++ b/modules/nf-core/gatk4/markduplicates/gatk4-markduplicates.diff @@ -1,4 +1,4 @@ -Changes in module 'nf-core/gatk4/markduplicates' +Changes in component 'nf-core/gatk4/markduplicates' 'modules/nf-core/gatk4/markduplicates/meta.yml' is unchanged Changes in 'gatk4/markduplicates/main.nf': --- modules/nf-core/gatk4/markduplicates/main.nf @@ -17,7 +17,14 @@ Changes in 'gatk4/markduplicates/main.nf': input: tuple val(meta), path(bam) -@@ -25,7 +25,7 @@ +@@ -19,13 +19,14 @@ + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.metrics"), emit: metrics + path "versions.yml", emit: versions ++ path '.command.{sh,log}', emit: command_files + + when: + task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' @@ -26,7 +33,7 @@ Changes in 'gatk4/markduplicates/main.nf': def input_list = bam.collect{"--INPUT $it"}.join(' ') def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" -@@ -38,20 +38,29 @@ +@@ -38,20 +39,29 @@ """ gatk --java-options "-Xmx${avail_mem}M" MarkDuplicates \\ $input_list \\ diff --git a/modules/nf-core/gatk4/markduplicates/main.nf b/modules/nf-core/gatk4/markduplicates/main.nf index 35e3a59e..91b8c3b3 100644 --- a/modules/nf-core/gatk4/markduplicates/main.nf +++ b/modules/nf-core/gatk4/markduplicates/main.nf @@ -19,6 +19,7 @@ process GATK4_MARKDUPLICATES { tuple val(meta), path("*.bai"), emit: bai, optional: true tuple val(meta), path("*.metrics"), emit: metrics path "versions.yml", emit: versions + path '.command.{sh,log}', emit: command_files when: task.ext.when == null || task.ext.when diff --git a/nextflow.config b/nextflow.config index 6faf1e38..32615124 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,6 +14,7 @@ params { // Workflow mode mode = null + purity_estimate_mode = null // Force options force_genome = false @@ -21,13 +22,15 @@ params { // Read processing and alignment options max_fastq_records = 10000000 - fastp_umi = false - redux_umi = false + fastp_umi_enabled = false + redux_umi_enabled = false // Process configuration - processes_manual = false - processes_include = null - processes_exclude = null + processes_manual = '' + processes_include = null + processes_exclude = null + + hmftools_log_level = 'DEBUG' // Reference genome information; iGenomes is effectively disabled but retained for linting genome = null @@ -43,8 +46,13 @@ params { isofox_read_length = null isofox_functions = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS' + // NOTE(SW): used only for panel resource creation + driver_gene_panel = null + target_regions_bed = null + gridss_config = null + ref_data_types = null prepare_reference_only = false create_stub_placeholders = false @@ -271,7 +279,7 @@ manifest { affiliation: '', email: '', github: '', - contribution: ['author'], + contribution: ['author', 'maintainer'], orcid: '' ], [ @@ -287,14 +295,14 @@ manifest { description = """A comprehensive cancer DNA/RNA analysis and reporting pipeline""" mainScript = 'main.nf' defaultBranch = 'master' - nextflowVersion = '!>=24.04.2' - version = '2.1.0' + nextflowVersion = '!>=24.10.5' + version = '2.2.0' doi = '' } // Nextflow plugins plugins { - id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.4.2' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { @@ -326,6 +334,7 @@ validation { 'fastp_umi_location', 'fastp_umi_skip', 'redux_umi_duplex_delim', + 'prepare_reference_only', ] lenientMode = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 82eed696..e58e06c5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["outdir"], "properties": { "input": { "type": "string", @@ -48,6 +48,12 @@ "type": "string", "description": "Workflow run mode.", "fa_icon": "fas fa-diagram-project", + "pattern": "^(wgts|targeted|purity_estimate|panel_resource_creation|prepare_reference)" + }, + "purity_estimate_mode": { + "type": "string", + "description": "Purity estimate workflow run mode.", + "fa_icon": "fas fa-diagram-project", "pattern": "^(wgts|targeted)" }, "panel": { @@ -68,24 +74,23 @@ "fa_icon": "fas fa-palette" }, "processes_manual": { - "type": "boolean", - "description": "Run only processes manually provided in processes_include.", + "type": "string", + "description": "Manually run processes provided as a comma separated list.", "fa_icon": "fas fa-diagram-project" }, "processes_exclude": { "type": "string", - "description": "Pipeline processes to exclude.", + "description": "Exclude processes provided as a comma separated list.", "fa_icon": "fas fa-diagram-project" }, "processes_include": { "type": "string", - "description": "Pipeline processes to include.", + "description": "Include processes that are excluded by default, provided as a comma separated list.", "fa_icon": "fas fa-diagram-project" }, - "prepare_reference_only": { - "type": "boolean", - "description": "Prepare and write reference output only.", - "default": false, + "ref_data_types": { + "type": "string", + "description": "Which reference data types to download and extract.", "fa_icon": "fas fa-diagram-project" }, "create_stub_placeholders": { @@ -100,7 +105,7 @@ "default": 10000000, "fa_icon": "fas fa-cog" }, - "fastp_umi": { + "fastp_umi_enabled": { "type": "boolean", "description": "Enable fastp UMI processing.", "default": false, @@ -123,7 +128,7 @@ "default": -1, "fa_icon": "fas fa-cog" }, - "redux_umi": { + "redux_umi_enabled": { "type": "boolean", "description": "Enable REDUX UMI processing.", "default": false, @@ -169,6 +174,23 @@ "description": "Semicolon-separated list of Isofox functions to run", "default": "TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS", "fa_icon": "fas fa-cog" + }, + "driver_gene_panel": { + "type": "string", + "description": "User defined driver gene panel used in panel resource creation, or for overriding the default file located in ref_data_hmf_data_path", + "fa_icon": "fas fa-cog" + }, + "target_regions_bed": { + "type": "string", + "description": "User defined target regions BED used in panel resource creation.", + "fa_icon": "fas fa-cog" + }, + "hmftools_log_level": { + "type": "string", + "enum": ["ALL", "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL"], + "description": "Log level filter for WiGiTS modules", + "default": "DEBUG", + "fa_icon": "fas fa-cog" } } }, @@ -277,6 +299,12 @@ "pattern": "^\\S+\\.bed$", "description": "Path to HLA slice BED file.", "fa_icon": "far fa-file-code" + }, + "prepare_reference_only": { + "type": "boolean", + "default": false, + "fa_icon": "fas fa-cog", + "hidden": true } } }, diff --git a/nf-test.config b/nf-test.config index 889df760..3a1fff59 100644 --- a/nf-test.config +++ b/nf-test.config @@ -9,7 +9,7 @@ config { configFile "tests/nextflow.config" // ignore tests coming from the nf-core/modules repo - ignore 'modules/nf-core/**/*', 'subworkflows/nf-core/**/*' + ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*' // run all test with defined profile(s) from the main nextflow.config profile "test" diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 97cbed4e..13465d0b 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -22,8 +22,8 @@ "@id": "./", "@type": "Dataset", "creativeWorkStatus": "InProgress", - "datePublished": "2025-06-03T11:02:02+00:00", - "description": "

\n \n \n \"nf-core/oncoanalyser\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/oncoanalyser/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/oncoanalyser/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.15189386-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.15189386)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.04.2-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/oncoanalyser)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23oncoanalyser-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/oncoanalyser)\n[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/oncoanalyser** is a Nextflow pipeline for the comprehensive analysis of cancer genomes and transcriptomes\nusing the [WiGiTS](https://github.com/hartwigmedical/hmftools) toolkit from the Hartwig Medical Foundation. The pipeline\nsupports a wide range of experimental setups:\n\n- FASTQ, BAM, or CRAM input files\n- WGS (whole genome sequencing), WTS (whole transcriptome sequencing), and targeted / panel sequencing (built-in support\n for the [TSO500\n panel](https://sapac.illumina.com/products/by-type/clinical-research-products/trusight-oncology-500.html) with other\n panels and exome requiring [panel reference data\n generation](https://github.com/hartwigmedical/hmftools/blob/master/pipeline/README_TARGETED.md))\n- Paired tumor / normal and tumor-only sample setups, donor sample support for further normal subtraction (e.g. for\n patients with bone marrow transplants or other contaminants in the tumor)\n- UMI (unique molecular identifier) processing supported for DNA sequencing data\n- Most GRCh37 and GRCh38 reference genome builds\n\n## Pipeline overview\n\n

\n\nThe pipeline mainly uses tools from [WiGiTS](https://github.com/hartwigmedical/hmftools), as well as some external\ntools. Due to the limitations of panel data, certain tools (indicated with `*` below) do not run in `targeted` mode.\n\n- Read alignment: [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) (DNA), [STAR](https://github.com/alexdobin/STAR) (RNA)\n- Read post-processing: [REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) (DNA), [Picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard) (RNA)\n- SNV, MNV, INDEL calling: [SAGE](https://github.com/hartwigmedical/hmftools/tree/master/sage), [PAVE](https://github.com/hartwigmedical/hmftools/tree/master/pave)\n- SV calling: [ESVEE](https://github.com/hartwigmedical/hmftools/tree/master/esvee)\n- CNV calling: [AMBER](https://github.com/hartwigmedical/hmftools/tree/master/amber), [COBALT](https://github.com/hartwigmedical/hmftools/tree/master/cobalt), [PURPLE](https://github.com/hartwigmedical/hmftools/tree/master/purple)\n- SV and driver event interpretation: [LINX](https://github.com/hartwigmedical/hmftools/tree/master/linx)\n- RNA transcript analysis: [ISOFOX](https://github.com/hartwigmedical/hmftools/tree/master/isofox)\n- Oncoviral detection: [VIRUSbreakend](https://github.com/PapenfussLab/gridss)\\*, [VirusInterpreter](https://github.com/hartwigmedical/hmftools/tree/master/virus-interpreter)\\*\n- Telomere characterisation: [TEAL](https://github.com/hartwigmedical/hmftools/tree/master/teal)\\*\n- Immune analysis: [LILAC](https://github.com/hartwigmedical/hmftools/tree/master/lilac), [CIDER](https://github.com/hartwigmedical/hmftools/tree/master/cider), [NEO](https://github.com/hartwigmedical/hmftools/tree/master/neo)\\*\n- Mutational signature fitting: [SIGS](https://github.com/hartwigmedical/hmftools/tree/master/sigs)\\*\n- HRD prediction: [CHORD](https://github.com/hartwigmedical/hmftools/tree/master/chord)\\*\n- Tissue of origin prediction: [CUPPA](https://github.com/hartwigmedical/hmftools/tree/master/cuppa)\\*\n- Pharmacogenomics: [PEACH](https://github.com/hartwigmedical/hmftools/tree/master/peach)\n- Summary report: [ORANGE](https://github.com/hartwigmedical/hmftools/tree/master/orange), [linxreport](https://github.com/umccr/linxreport)\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCreate a samplesheet with your inputs (WGS/WTS BAMs in this example):\n\n```csv\ngroup_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath\nPATIENT1_WGTS,PATIENT1,PATIENT1-N,normal,dna,bam,/path/to/PATIENT1-N.dna.bam\nPATIENT1_WGTS,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam\nPATIENT1_WGTS,PATIENT1,PATIENT1-T-RNA,tumor,rna,bam,/path/to/PATIENT1-T.rna.bam\n```\n\nLaunch `oncoanalyser`:\n\n```bash\nnextflow run nf-core/oncoanalyser \\\n -profile \\\n -revision 2.1.0 \\\n --mode \\\n --genome \\\n --input samplesheet.csv \\\n --outdir output/\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/oncoanalyser/usage) and the [parameter documentation](https://nf-co.re/oncoanalyser/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/oncoanalyser/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/oncoanalyser/output).\n\n## Version information\n\n### Extended support\n\nAs `oncoanalyser` is used in clinical settings and subject to accreditation standards in some instances, there is a need\nfor long-term stability and reliability for feature releases in order to meet operational requirements. This is\naccomplished through long-term support of several nominated feature releases, which all receive bug fixes and security\nfixes during the period of extended support.\n\nEach release that is given extended support is allocated a separate long-lived git branch with the 'stable' prefix, e.g.\n`stable/1.2.x`, `stable/1.5.x`. Feature development otherwise occurs on the `dev` branch with stable releases pushed to\n`master`.\n\nVersions nominated to have current long-term support:\n\n- TBD\n\n## Known issues\n\nPlease refer to [this page](https://github.com/nf-core/oncoanalyser/issues/177) for details regarding any known issues.\n\n## Credits\n\nThe `oncoanalyser` pipeline was written and is maintained by Stephen Watts ([@scwatts](https://github.com/scwatts)) from\nthe [Genomics Platform\nGroup](https://mdhs.unimelb.edu.au/centre-for-cancer-research/our-research/genomics-platform-group) at the [University\nof Melbourne Centre for Cancer Research](https://mdhs.unimelb.edu.au/centre-for-cancer-research).\n\nWe thank the following organisations and people for their extensive assistance in the development of this pipeline,\nlisted in alphabetical order:\n\n- [Hartwig Medical Foundation\n Australia](https://www.hartwigmedicalfoundation.nl/en/partnerships/hartwig-medical-foundation-australia/)\n- Oliver Hofmann\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#oncoanalyser`\nchannel](https://nfcore.slack.com/channels/oncoanalyser) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nYou can cite the `oncoanalyser` Zenodo record for a specific version using the following DOI:\n[10.5281/zenodo.15189386](https://doi.org/10.5281/zenodo.15189386)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md)\nfile.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia,\n> Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "datePublished": "2025-08-12T00:13:00+00:00", + "description": "

\n \n \n \"nf-core/oncoanalyser\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/oncoanalyser/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/oncoanalyser/actions/workflows/linting.yml)\n[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/oncoanalyser/results)\n[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.15189386-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.15189386)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A524.10.5-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.3.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.3.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/oncoanalyser)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23oncoanalyser-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/oncoanalyser)\n[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)\n[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)\n[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/oncoanalyser** is a Nextflow pipeline for the comprehensive analysis of cancer DNA and RNA sequencing data\nusing the [WiGiTS](https://github.com/hartwigmedical/hmftools) toolkit from the Hartwig Medical Foundation. The pipeline\nsupports a wide range of experimental setups:\n\n- FASTQ, BAM, and / or CRAM input files\n- WGS (whole genome sequencing), WTS (whole transcriptome sequencing), and targeted / panel sequencing1\n- Paired tumor / normal and tumor-only samples, and support for donor samples for further normal subtraction\n- Purity estimate for longitudinal samples using genomic features of the primary sample from the same patient2\n- UMI (unique molecular identifier) processing supported for DNA sequencing data\n- Most GRCh37 and GRCh38 reference genome builds\n\n1 built-in support for the [TSO500\npanel](https://www.illumina.com/products/by-type/clinical-research-products/trusight-oncology-500.html) with other\npanels and exomes requiring [creation of custom panel reference\ndata](https://nf-co.re/oncoanalyser/usage#custom-panels)\n
\n2 for example a primary WGS tissue biospy and longitudinal low-pass WGS ccfDNA sample taken from the\nsame patient\n\n## Pipeline overview\n\n

\n\nThe pipeline mainly uses tools from [WiGiTS](https://github.com/hartwigmedical/hmftools), as well as some other external\ntools. There are [several workflows available](https://nf-co.re/oncoanalyser/usage#introduction) in `oncoanalyser` and\nthe tool information below primarily relates to the `wgts` and `targeted` analysis modes.\n\n> [!NOTE]\n> Due to the limitations of panel data, certain tools (indicated with `*` below) do not run in `targeted` mode.\n\n- Read alignment: [BWA-MEM2](https://github.com/bwa-mem2/bwa-mem2) (DNA), [STAR](https://github.com/alexdobin/STAR) (RNA)\n- Read post-processing: [REDUX](https://github.com/hartwigmedical/hmftools/tree/master/redux) (DNA), [Picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard) (RNA)\n- SNV, MNV, INDEL calling: [SAGE](https://github.com/hartwigmedical/hmftools/tree/master/sage), [PAVE](https://github.com/hartwigmedical/hmftools/tree/master/pave)\n- SV calling: [ESVEE](https://github.com/hartwigmedical/hmftools/tree/master/esvee)\n- CNV calling: [AMBER](https://github.com/hartwigmedical/hmftools/tree/master/amber), [COBALT](https://github.com/hartwigmedical/hmftools/tree/master/cobalt), [PURPLE](https://github.com/hartwigmedical/hmftools/tree/master/purple)\n- SV and driver event interpretation: [LINX](https://github.com/hartwigmedical/hmftools/tree/master/linx)\n- RNA transcript analysis: [ISOFOX](https://github.com/hartwigmedical/hmftools/tree/master/isofox)\n- Oncoviral detection: [VIRUSbreakend](https://github.com/PapenfussLab/gridss)\\*, [VirusInterpreter](https://github.com/hartwigmedical/hmftools/tree/master/virus-interpreter)\\*\n- Telomere characterisation: [TEAL](https://github.com/hartwigmedical/hmftools/tree/master/teal)\\*\n- Immune analysis: [LILAC](https://github.com/hartwigmedical/hmftools/tree/master/lilac), [CIDER](https://github.com/hartwigmedical/hmftools/tree/master/cider), [NEO](https://github.com/hartwigmedical/hmftools/tree/master/neo)\\*\n- Mutational signature fitting: [SIGS](https://github.com/hartwigmedical/hmftools/tree/master/sigs)\\*\n- HRD prediction: [CHORD](https://github.com/hartwigmedical/hmftools/tree/master/chord)\\*\n- Tissue of origin prediction: [CUPPA](https://github.com/hartwigmedical/hmftools/tree/master/cuppa)\\*\n- Pharmacogenomics: [PEACH](https://github.com/hartwigmedical/hmftools/tree/master/peach)\n- Summary report: [ORANGE](https://github.com/hartwigmedical/hmftools/tree/master/orange), [linxreport](https://github.com/umccr/linxreport)\n\nFor the `purity_estimate` mode, several of the above tools are run with adjusted configuration in addition to the following.\n\n- Tumor fraction estimation: [WISP](https://github.com/hartwigmedical/hmftools/tree/master/wisp)\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nCreate a samplesheet with your inputs (WGS/WTS BAMs in this example):\n\n```csv\ngroup_id,subject_id,sample_id,sample_type,sequence_type,filetype,filepath\nPATIENT1_WGTS,PATIENT1,PATIENT1-N,normal,dna,bam,/path/to/PATIENT1-N.dna.bam\nPATIENT1_WGTS,PATIENT1,PATIENT1-T,tumor,dna,bam,/path/to/PATIENT1-T.dna.bam\nPATIENT1_WGTS,PATIENT1,PATIENT1-T-RNA,tumor,rna,bam,/path/to/PATIENT1-T.rna.bam\n```\n\nLaunch `oncoanalyser`:\n\n```bash\nnextflow run nf-core/oncoanalyser \\\n -profile \\\n -revision 2.2.0 \\\n --mode \\\n --genome \\\n --input samplesheet.csv \\\n --outdir output/\n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/oncoanalyser/usage) and the [parameter documentation](https://nf-co.re/oncoanalyser/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/oncoanalyser/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/oncoanalyser/output).\n\n## Version information\n\n### Extended support\n\nAs `oncoanalyser` is used in clinical settings and subject to accreditation standards in some instances, there is a need\nfor long-term stability and reliability for feature releases in order to meet operational requirements. This is\naccomplished through long-term support of several nominated feature releases, which all receive bug fixes and security\nfixes during the period of extended support.\n\nEach release that is given extended support is allocated a separate long-lived git branch with the 'stable' prefix, e.g.\n`stable/1.2.x`, `stable/1.5.x`. Feature development otherwise occurs on the `dev` branch with stable releases pushed to\n`master`.\n\nVersions nominated to have current long-term support:\n\n- TBD\n\n## Known issues\n\nPlease refer to [this page](https://github.com/nf-core/oncoanalyser/issues/177) for details regarding any known issues.\n\n## Credits\n\nThe `oncoanalyser` pipeline was written and is maintained by Stephen Watts ([@scwatts](https://github.com/scwatts)) from\nthe [Genomics Platform\nGroup](https://mdhs.unimelb.edu.au/centre-for-cancer-research/our-research/genomics-platform-group) at the [University\nof Melbourne Centre for Cancer Research](https://mdhs.unimelb.edu.au/centre-for-cancer-research).\n\nWe thank the following organisations and people for their extensive assistance in the development of this pipeline,\nlisted in alphabetical order:\n\n- [Hartwig Medical Foundation\n Australia](https://www.hartwigmedicalfoundation.nl/en/partnerships/hartwig-medical-foundation-australia/)\n- Oliver Hofmann\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#oncoanalyser`\nchannel](https://nfcore.slack.com/channels/oncoanalyser) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nYou can cite the `oncoanalyser` Zenodo record for a specific version using the following DOI:\n[10.5281/zenodo.15189386](https://doi.org/10.5281/zenodo.15189386)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md)\nfile.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia,\n> Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -99,7 +99,7 @@ }, "mentions": [ { - "@id": "#32a9e1da-9051-46b0-b25a-1b1befa7dd48" + "@id": "#073c208c-03e4-4359-94b2-84c6e8b53d2a" } ], "name": "nf-core/oncoanalyser" @@ -132,7 +132,7 @@ } ], "dateCreated": "", - "dateModified": "2025-06-03T11:02:02Z", + "dateModified": "2025-08-12T10:13:00Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -171,7 +171,7 @@ "https://nf-co.re/oncoanalyser/dev/" ], "version": [ - "2.1.0dev" + "2.2.0" ] }, { @@ -184,14 +184,14 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=24.04.2" + "version": "!>=24.10.5" }, { - "@id": "#32a9e1da-9051-46b0-b25a-1b1befa7dd48", + "@id": "#073c208c-03e4-4359-94b2-84c6e8b53d2a", "@type": "TestSuite", "instance": [ { - "@id": "#23ed753d-f441-4fe9-9cd9-f831c5e33ad0" + "@id": "#a8d79edc-7336-457c-bd2a-d62e3bbdcd96" } ], "mainEntity": { @@ -200,7 +200,7 @@ "name": "Test suite for nf-core/oncoanalyser" }, { - "@id": "#23ed753d-f441-4fe9-9cd9-f831c5e33ad0", + "@id": "#a8d79edc-7336-457c-bd2a-d62e3bbdcd96", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/oncoanalyser", "resource": "repos/nf-core/oncoanalyser/actions/workflows/nf-test.yml", diff --git a/subworkflows/local/amber_profiling/main.nf b/subworkflows/local/amber_profiling/main.nf index 47d5048d..719eac49 100644 --- a/subworkflows/local/amber_profiling/main.nf +++ b/subworkflows/local/amber_profiling/main.nf @@ -18,7 +18,8 @@ workflow AMBER_PROFILING { // Reference data genome_version // channel: [mandatory] genome version heterozygous_sites // channel: [optional] /path/to/heterozygous_sites - target_region_bed // channel: [optional] /path/to/target_region_bed + target_regions_bed // channel: [optional] /path/to/target_regions_bed + tumor_min_depth // integer: [optional] -tumor_min_depth argument value main: // Channel for version.yml files @@ -48,7 +49,14 @@ workflow AMBER_PROFILING { } .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, donor_bam, donor_bai -> def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.AMBER_DIR) - runnable: tumor_bam && !has_existing + + + // TODO(SW): must improve handling through separation of sample information in meta; currently unable to provide ccfDNA AMBER directory in samplesheet + def longitudinal_sample = Utils.getTumorDnaSample(meta).containsKey('longitudinal_sample_id') + + runnable: tumor_bam && (!has_existing || longitudinal_sample) + + skip: true return meta } @@ -80,7 +88,8 @@ workflow AMBER_PROFILING { ch_amber_inputs, genome_version, heterozygous_sites, - target_region_bed, + target_regions_bed, + tumor_min_depth, ) ch_versions = ch_versions.mix(AMBER.out.versions) diff --git a/subworkflows/local/bamtools_metrics/main.nf b/subworkflows/local/bamtools_metrics/main.nf index b450d36f..3f2d7887 100644 --- a/subworkflows/local/bamtools_metrics/main.nf +++ b/subworkflows/local/bamtools_metrics/main.nf @@ -10,13 +10,15 @@ include { BAMTOOLS } from '../../../modules/local/bamtools/main' workflow BAMTOOLS_METRICS { take: // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] - ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] + ch_inputs // channel: [mandatory] [ meta ] + ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] + ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] // Reference data - genome_fasta // channel: [mandatory] /path/to/genome_fasta - genome_version // channel: [mandatory] genome version + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_version // channel: [mandatory] genome version + driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel + ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ main: // Channel for version.yml files @@ -82,6 +84,8 @@ workflow BAMTOOLS_METRICS { ch_bamtools_inputs, genome_fasta, genome_version, + driver_gene_panel, + ensembl_data_resources, ) ch_versions = ch_versions.mix(BAMTOOLS.out.versions) diff --git a/subworkflows/local/cider_calling/main.nf b/subworkflows/local/cider_calling/main.nf index 9a5ec004..c032ffca 100644 --- a/subworkflows/local/cider_calling/main.nf +++ b/subworkflows/local/cider_calling/main.nf @@ -84,5 +84,5 @@ workflow CIDER_CALLING { ch_versions = ch_versions.mix(CIDER.out.versions) emit: - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/cobalt_normalisation/main.nf b/subworkflows/local/cobalt_normalisation/main.nf new file mode 100644 index 00000000..a608b292 --- /dev/null +++ b/subworkflows/local/cobalt_normalisation/main.nf @@ -0,0 +1,54 @@ +// +// COBALT normalisation prepares the panel-specific target region normalisation resource +// + +import Constants +import Utils + +include { COBALT_PANEL_NORMALISATION } from '../../../modules/local/cobalt/panel_normalisation/main' + +workflow COBALT_NORMALISATION { + take: + // Sample data + ch_amber // channel: [mandatory] [ meta, amber_dir ] + ch_cobalt // channel: [mandatory] [ meta, cobalt_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + gc_profile // channel: [mandatory] /path/to/gc_profile + target_region_bed // channel: [mandatory] /path/to/target_region_bed + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create process input channel + // channel: [ [amber_dir, ...], [cobalt_dir, ...] ] + ch_cobalt_inputs = WorkflowOncoanalyser.groupByMeta( + ch_amber, + ch_cobalt, + ) + .map { meta, amber_dir, cobalt_dir -> + return [ + Utils.selectCurrentOrExisting(amber_dir, meta, Constants.INPUT.AMBER_DIR), + Utils.selectCurrentOrExisting(cobalt_dir, meta, Constants.INPUT.COBALT_DIR), + ] + } + .collect(flat: false) + .map { d -> d.transpose() } + + + // Run process + COBALT_PANEL_NORMALISATION( + ch_cobalt_inputs, + genome_version, + gc_profile, + target_region_bed, + ) + + ch_versions = ch_versions.mix(COBALT_PANEL_NORMALISATION.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/cobalt_profiling/main.nf b/subworkflows/local/cobalt_profiling/main.nf index fb5a3b77..9d04745c 100644 --- a/subworkflows/local/cobalt_profiling/main.nf +++ b/subworkflows/local/cobalt_profiling/main.nf @@ -5,7 +5,7 @@ import Constants import Utils -include { COBALT } from '../../../modules/local/cobalt/main' +include { COBALT } from '../../../modules/local/cobalt/run/main' workflow COBALT_PROFILING { take: @@ -18,6 +18,7 @@ workflow COBALT_PROFILING { gc_profile // channel: [mandatory] /path/to/gc_profile diploid_bed // channel: [optional] /path/to/diploid_bed target_region_normalisation // channel: [optional] /path/to/target_region_normalisation + targeted_mode // boolean: [mandatory] Running COBALT with targeted mode args? main: // Channel for version.yml files @@ -84,6 +85,7 @@ workflow COBALT_PROFILING { gc_profile, ch_cobalt_inputs.diploid_bed, target_region_normalisation, + targeted_mode, ) ch_versions = ch_versions.mix(COBALT.out.versions) diff --git a/subworkflows/local/esvee_calling/main.nf b/subworkflows/local/esvee_calling/main.nf index dc709bee..e2bd943b 100644 --- a/subworkflows/local/esvee_calling/main.nf +++ b/subworkflows/local/esvee_calling/main.nf @@ -7,42 +7,30 @@ import Utils import java.nio.channels.Channel -include { ESVEE_PREP } from '../../../modules/local/esvee/prep/main' -include { ESVEE_ASSEMBLE } from '../../../modules/local/esvee/assemble/main' -include { ESVEE_DEPTH_ANNOTATOR } from '../../../modules/local/esvee/depth_annotator/main' -include { ESVEE_CALL } from '../../../modules/local/esvee/call/main' +include { ESVEE } from '../../../modules/local/esvee/main' workflow ESVEE_CALLING { take: - // Sample data ch_inputs // channel: [mandatory] [ meta ] ch_tumor_bam // channel: [mandatory] [ meta, bam, bai ] ch_normal_bam // channel: [mandatory] [ meta, bam, bai ] - - // Reference data genome_fasta // channel: [mandatory] /path/to/genome_fasta genome_version // channel: [mandatory] genome version genome_fai // channel: [mandatory] /path/to/genome_fai genome_dict // channel: [mandatory] /path/to/genome_dict genome_img // channel: [optional] /path/to/genome_img - sv_prep_blocklist // channel: [mandatory] /path/to/sv_prep_blocklist known_fusions // channel: [mandatory] /path/to/known_fusions pon_breakends // channel: [mandatory] /path/to/pon_sgl pon_breakpoints // channel: [mandatory] /path/to/pon_sv + decoy_sequences_image // channel: [mandatory] /path/to/decoy_sequences_image repeatmasker_annotations // channel: [mandatory] /path/to/repeatmasker_annotations - decoy_sequences // channel: [mandatory] /path/to/deocy_sequences - unmap_regions // channel: [mandatory] /path/to/deocy_sequences - + unmap_regions // channel: [mandatory] /path/to/unmap_regions main: // Channel for version.yml files - // channel: [ versions.yml ] ch_versions = Channel.empty() // Select input sources and sort - // channel: runnable_tn: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] - // channel: runnable_to: [ meta, tumor_bam, tumor_bai ] - // channel: skip: [ meta ] ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( ch_tumor_bam, ch_normal_bam, @@ -57,7 +45,6 @@ workflow ESVEE_CALLING { ] } .branch { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.ESVEE_VCF_TUMOR) runnable_tn: tumor_bam && normal_bam && !has_existing @@ -67,12 +54,8 @@ workflow ESVEE_CALLING { return meta } - // - // MODULE: Esvee prep - // // Create process input channel - // channel: [ meta_esvee, tumor_bam, tumor_bai, normal_bam, normal_bai ] - ch_bam_inputs = Channel.empty() + ch_esvee_inputs = Channel.empty() .mix( ch_inputs_sorted.runnable_tn, ch_inputs_sorted.runnable_to.map { [*it, [], []] }, @@ -92,112 +75,42 @@ workflow ESVEE_CALLING { return [meta_esvee, tumor_bam, tumor_bai, normal_bam, normal_bai] } - // Run process - ESVEE_PREP( - ch_bam_inputs, - genome_fasta, - genome_version, - sv_prep_blocklist, - known_fusions, - ) - - ch_versions = ch_versions.mix(ESVEE_PREP.out.versions) - - // Set output normal prep BAM channel inclusive of placeholders - // channel: [ meta_esvee, normal_prep_bam, normal_prep_bai ] - ch_esvee_prep_normal_bam = Channel.empty() - .mix( - ESVEE_PREP.out.normal_prep_bam, - ch_bam_inputs.filter { ! it[0].containsKey('normal_id') }.map { [it[0], [], []] }, - ) - - // - // MODULE: ESVEE assemble reads - // - // Create process input channel - // channel: [ meta_esvee, tumor_prep_bam, tumor_prep_bai, normal_prep_bam, normal_prep_bai, prep_dir ] - ch_assemble_inputs = WorkflowOncoanalyser.groupByMeta( - ESVEE_PREP.out.tumor_prep_bam, - ch_esvee_prep_normal_bam, - ESVEE_PREP.out.prep_dir, - ) - - // Run process - ESVEE_ASSEMBLE( - ch_assemble_inputs, + // Run ESVEE process + ESVEE( + ch_esvee_inputs, genome_fasta, genome_fai, genome_dict, genome_img, genome_version, - decoy_sequences, - ) - - ch_versions = ch_versions.mix(ESVEE_ASSEMBLE.out.versions) - - - // MODULE: ESVEE annotated reference sample depth - // - // Create process input channel - // channel: [ meta_esvee, tumor_bam, tumor_bai, normal_bam, normal_bai, assemble_dir ] - ch_depth_annotator_inputs = WorkflowOncoanalyser.groupByMeta( - ch_bam_inputs, - ESVEE_ASSEMBLE.out.raw_vcf, - ) - - // Run process - ESVEE_DEPTH_ANNOTATOR( - ch_depth_annotator_inputs, - genome_fasta, - genome_version, - unmap_regions, - ) - - ch_versions = ch_versions.mix(ESVEE_DEPTH_ANNOTATOR.out.versions) - - - // - // MODULE: ESVEE call somatic structural variants - // - // Create process input channel - // channel: [meta_esvee, ref_depth_vcf, prep_dir] - ch_call_inputs = WorkflowOncoanalyser.groupByMeta( - ESVEE_DEPTH_ANNOTATOR.out.ref_depth_vcf, - ESVEE_PREP.out.prep_dir, - ) - - ESVEE_CALL( - ch_call_inputs, - genome_fasta, - genome_version, pon_breakends, pon_breakpoints, + decoy_sequences_image, known_fusions, repeatmasker_annotations, + unmap_regions ) - ch_versions = ch_versions.mix(ESVEE_CALL.out.versions) - + ch_versions = ch_versions.mix(ESVEE.out.versions) // Set outputs, restoring original meta - // channel: [ meta, esvee_vcf ] ch_somatic_out = Channel.empty() .mix( - WorkflowOncoanalyser.restoreMeta(ESVEE_CALL.out.somatic_vcf, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + WorkflowOncoanalyser.restoreMeta(ESVEE.out.somatic_vcf, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, [], []] } ) ch_germline_out = Channel.empty() .mix( - WorkflowOncoanalyser.restoreMeta(ESVEE_CALL.out.germline_vcf, ch_inputs), + WorkflowOncoanalyser.restoreMeta(ESVEE.out.germline_vcf, ch_inputs), ch_inputs_sorted.runnable_to.map { meta, tumor_bam, tumor_bai -> [meta, [], []] }, ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, ) ch_unfiltered_out = Channel.empty() .mix( - WorkflowOncoanalyser.restoreMeta(ESVEE_CALL.out.unfiltered_vcf, ch_inputs), - ch_inputs_sorted.skip.map { meta -> [meta, [], []] }, + WorkflowOncoanalyser.restoreMeta(ESVEE.out.unfiltered_vcf, ch_inputs), + ch_inputs_sorted.skip.map { meta -> [meta, [], []] } ) emit: @@ -205,5 +118,5 @@ workflow ESVEE_CALLING { germline_vcf = ch_germline_out // channel: [ meta, vcf, tbi ] unfiltered_vcf = ch_unfiltered_out // channel: [ meta, vcf, tbi ] - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/isofox_normalisation/main.nf b/subworkflows/local/isofox_normalisation/main.nf new file mode 100644 index 00000000..08d87bf1 --- /dev/null +++ b/subworkflows/local/isofox_normalisation/main.nf @@ -0,0 +1,45 @@ +// +// ISOFOX normalisation prepares panel-specific TPM normalisation resource +// + +import Constants +import Utils + +include { ISOFOX_PANEL_NORMALISATION } from '../../../modules/local/isofox/panel_normalisation/main' + +workflow ISOFOX_NORMALISATION { + take: + // Sample data + ch_isofox // channel: [mandatory] [ meta, isofox_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + isofox_gene_ids // channel: [mandatory] /path/to/gene_ids + isofox_gene_distribution // channel: [mandatory] /path/to/isofox_gene_distribution + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create process input channel + // channel: [ [isofox_dir, ...] ] + ch_isofox_inputs = ch_isofox + .map { meta, isofox_dir -> + return Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR) + } + .collect() + + // Run process + ISOFOX_PANEL_NORMALISATION( + ch_isofox_inputs, + genome_version, + isofox_gene_ids, + isofox_gene_distribution, + ) + + ch_versions = ch_versions.mix(ISOFOX_PANEL_NORMALISATION.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/isofox_quantification/main.nf b/subworkflows/local/isofox_quantification/main.nf index 51d2641a..111c2078 100644 --- a/subworkflows/local/isofox_quantification/main.nf +++ b/subworkflows/local/isofox_quantification/main.nf @@ -5,7 +5,7 @@ import Constants import Utils -include { ISOFOX } from '../../../modules/local/isofox/main' +include { ISOFOX } from '../../../modules/local/isofox/run/main' workflow ISOFOX_QUANTIFICATION { take: diff --git a/subworkflows/local/lilac_calling/main.nf b/subworkflows/local/lilac_calling/main.nf index 36ac2756..cd6c9059 100644 --- a/subworkflows/local/lilac_calling/main.nf +++ b/subworkflows/local/lilac_calling/main.nf @@ -25,6 +25,7 @@ workflow LILAC_CALLING { genome_fai // channel: [mandatory] /path/to/genome_fai lilac_resource_dir // channel: [mandatory] /path/to/lilac_resource_dir/ hla_slice_bed // channel: [mandatory] /path/to/hla_slice_bed + targeted_mode // boolean: [mandatory] Running in targeted/panel mode? main: // Channel for version.yml files @@ -206,6 +207,7 @@ workflow LILAC_CALLING { genome_fai, genome_version, lilac_resource_dir, + targeted_mode ) ch_versions = ch_versions.mix(LILAC.out.versions) diff --git a/subworkflows/local/neo_prediction/main.nf b/subworkflows/local/neo_prediction/main.nf index 0b23e462..99ff41da 100644 --- a/subworkflows/local/neo_prediction/main.nf +++ b/subworkflows/local/neo_prediction/main.nf @@ -16,7 +16,7 @@ workflow NEO_PREDICTION { ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] ch_isofox // channel: [mandatory] [ meta, isofox_dir ] ch_purple // channel: [mandatory] [ meta, purple_dir ] - ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] + ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_dir ] ch_lilac // channel: [mandatory] [ meta, lilac_dir ] ch_linx // channel: [mandatory] [ meta, linx_annotation_dir ] @@ -179,18 +179,22 @@ workflow NEO_PREDICTION { def meta_scorer = [ key: meta.group_id, id: meta.group_id, - sample_id: Utils.getTumorDnaSampleName(meta), + sample_id: Utils.getTumorDnaSampleName(meta, primary: true), cancer_type: meta[Constants.InfoField.CANCER_TYPE], ] + def sage_somatic_append_vcf = [] if (Utils.hasTumorRna(meta)) { meta_scorer.sample_rna_id = Utils.getTumorRnaSampleName(meta) + + def sage_somatic_append_selected = Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_DIR_TUMOR) + sage_somatic_append_vcf = file(sage_somatic_append_selected).resolve("${meta_scorer.sample_id}.sage.append.vcf.gz") } def inputs = [ Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR), Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), - Utils.selectCurrentOrExisting(sage_somatic_append, meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), + sage_somatic_append_vcf, Utils.selectCurrentOrExisting(lilac_dir, meta, Constants.INPUT.LILAC_DIR), neo_finder_dir, annotated_fusions, diff --git a/subworkflows/local/orange_reporting/main.nf b/subworkflows/local/orange_reporting/main.nf index 46d77793..67573048 100644 --- a/subworkflows/local/orange_reporting/main.nf +++ b/subworkflows/local/orange_reporting/main.nf @@ -15,8 +15,8 @@ workflow ORANGE_REPORTING { ch_bamtools_germline // channel: [mandatory] [ meta, metrics_dir ] ch_sage_somatic // channel: [mandatory] [ meta, sage_dir ] ch_sage_germline // channel: [mandatory] [ meta, sage_dir ] - ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_vcf ] - ch_sage_germline_append // channel: [mandatory] [ meta, sage_append_vcf ] + ch_sage_somatic_append // channel: [mandatory] [ meta, sage_append_dir ] + ch_sage_germline_append // channel: [mandatory] [ meta, sage_append_dir ] ch_purple // channel: [mandatory] [ meta, purple_dir ] ch_linx_somatic_annotation // channel: [mandatory] [ meta, linx_annotation_dir ] ch_linx_somatic_plot // channel: [mandatory] [ meta, linx_visualiser_dir ] @@ -68,8 +68,9 @@ workflow ORANGE_REPORTING { 16, // isofox_dir ] - rna_sage_germline_append_index = 7 // sage_germline_append - cuppa_dir_index = 14 // cuppa_dir + sage_somatic_append_index = 4 // sage_somatic_append + sage_germline_append_index = 5 // sage_germline_append + cuppa_dir_index = 14 // cuppa_dir // Select input sources // channel: [ meta, tbt_metrics_dir, nbt_metrics_dir, tsage_dir, nsage_dir, tsage_append, nsage_append, purple_dir, tlinx_anno_dir, tlinx_plot_dir, nlinx_anno_dir, virusinterpreter_dir, chord_dir, sigs_dir, lilac_dir, cuppa_dir, ch_peach, isofox_dir ] @@ -106,8 +107,8 @@ workflow ORANGE_REPORTING { Utils.selectCurrentOrExisting(inputs[1], meta, Constants.INPUT.BAMTOOLS_DIR_NORMAL), Utils.selectCurrentOrExisting(inputs[2], meta, Constants.INPUT.SAGE_DIR_TUMOR), Utils.selectCurrentOrExisting(inputs[3], meta, Constants.INPUT.SAGE_DIR_NORMAL), - Utils.selectCurrentOrExisting(inputs[4], meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR), - Utils.selectCurrentOrExisting(inputs[5], meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL), + Utils.selectCurrentOrExisting(inputs[4], meta, Constants.INPUT.SAGE_APPEND_DIR_TUMOR), + Utils.selectCurrentOrExisting(inputs[5], meta, Constants.INPUT.SAGE_APPEND_DIR_NORMAL), Utils.selectCurrentOrExisting(inputs[6], meta, Constants.INPUT.PURPLE_DIR), Utils.selectCurrentOrExisting(inputs[7], meta, Constants.INPUT.LINX_ANNO_DIR_TUMOR), Utils.selectCurrentOrExisting(inputs[8], meta, Constants.INPUT.LINX_PLOT_DIR_TUMOR), @@ -195,7 +196,7 @@ workflow ORANGE_REPORTING { // SAGE append germline is only required when normal DNA is present def rna_tumor_input_indexes_ready if (has_dna_normal) { - rna_tumor_input_indexes_ready = [*rna_tumor_input_indexes, rna_sage_germline_append_index] + rna_tumor_input_indexes_ready = [*rna_tumor_input_indexes, sage_germline_append_index] } else { rna_tumor_input_indexes_ready = rna_tumor_input_indexes.clone() } @@ -214,11 +215,26 @@ workflow ORANGE_REPORTING { // will generate RNA only outputs and no visualisation, which triggers missing file error in ORANGE if (inputs_selected[cuppa_dir_index]) { def cuppa_vis_data_fp = inputs_selected[cuppa_dir_index].resolve("${meta_orange.tumor_id}.cuppa.vis_data.tsv") - if (! cuppa_vis_data_fp.exists()) { + if (!cuppa_vis_data_fp.exists()) { inputs_selected[cuppa_dir_index] = [] } } + // Set SAGE append VCF input + if (has_rna_tumor) { + // Somatic + def sage_somatic_append = inputs_selected[sage_somatic_append_index] + if (sage_somatic_append) { + inputs_selected[sage_somatic_append_index] = file(sage_somatic_append).resolve("${meta_orange.tumor_id}.sage.append.vcf.gz") + } + + // Germline + def sage_germline_append = inputs_selected[sage_germline_append_index] + if (sage_germline_append) { + inputs_selected[sage_germline_append_index] = file(sage_germline_append).resolve("${meta_orange.normal_dna_id}.sage.append.vcf.gz") + } + } + assert inputs_selected.size() == input_expected_size sample_data: [meta_orange, *inputs_selected] @@ -239,7 +255,7 @@ workflow ORANGE_REPORTING { ensembl_data_resources, ch_orange_inputs.isofox_alt_sj, ch_orange_inputs.isofox_gene_distribution, - '2.1.0 [oncoanalyser]', + '2.2.0 [oncoanalyser]', ) ch_versions = ch_versions.mix(ORANGE.out.versions) diff --git a/subworkflows/local/pave_annotation/main.nf b/subworkflows/local/pave_annotation/main.nf index 93469709..27a0b567 100644 --- a/subworkflows/local/pave_annotation/main.nf +++ b/subworkflows/local/pave_annotation/main.nf @@ -19,8 +19,8 @@ workflow PAVE_ANNOTATION { genome_fasta // channel: [mandatory] /path/to/genome_fasta genome_version // channel: [mandatory] genome version genome_fai // channel: [mandatory] /path/to/genome_fai - sage_pon // channel: [mandatory] /path/to/sage_pon pon_artefacts // channel: [optional] /path/to/pon_artefacts + sage_pon // channel: [mandatory] /path/to/sage_pon sage_blocklist_regions // channel: [mandatory] /path/to/sage_blocklist_regions sage_blocklist_sites // channel: [mandatory] /path/to/sage_blocklist_sites clinvar_annotations // channel: [mandatory] /path/to/clinvar_annotations @@ -82,7 +82,6 @@ workflow PAVE_ANNOTATION { segment_mappability, driver_gene_panel, ensembl_data_resources, - gnomad_resource, ) ch_versions = ch_versions.mix(PAVE_GERMLINE.out.versions) @@ -130,8 +129,8 @@ workflow PAVE_ANNOTATION { genome_fasta, genome_version, genome_fai, - sage_pon, pon_artefacts, + sage_pon, clinvar_annotations, segment_mappability, driver_gene_panel, diff --git a/subworkflows/local/pave_pon_creation/main.nf b/subworkflows/local/pave_pon_creation/main.nf new file mode 100644 index 00000000..4315586c --- /dev/null +++ b/subworkflows/local/pave_pon_creation/main.nf @@ -0,0 +1,46 @@ +// +// PAVE PON creation prepares the panel-specific small variant artefact resource +// + +import Constants +import Utils + +include { PAVE_PON_PANEL_CREATION } from '../../../modules/local/pave/pon_creation/main' + + +workflow PAVE_PON_CREATION { + take: + // Sample data + ch_sage_somatic_vcf // channel: [mandatory] [ meta, sage_somatic_vcf, sage_somatic_tbi ] + + // Reference data + genome_version // channel: [mandatory] genome version + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create process input channel + // channel: [ [sage_vcf, ...], [sage_tbi, ...] ] + ch_pave_inputs = ch_sage_somatic_vcf + .map { meta, sage_vcf, sage_tbi -> + return [ + Utils.selectCurrentOrExisting(sage_vcf, meta, Constants.INPUT.SAGE_VCF_TUMOR), + Utils.selectCurrentOrExisting(sage_tbi, meta, Constants.INPUT.SAGE_VCF_TBI_TUMOR), + ] + } + .collect(flat: false) + .map { d -> d.transpose() } + + // Run process + PAVE_PON_PANEL_CREATION( + ch_pave_inputs, + genome_version, + ) + + ch_versions = ch_versions.mix(PAVE_PON_PANEL_CREATION.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/prepare_reference/main.nf b/subworkflows/local/prepare_reference/main.nf index eb145199..541e7f42 100644 --- a/subworkflows/local/prepare_reference/main.nf +++ b/subworkflows/local/prepare_reference/main.nf @@ -8,20 +8,30 @@ include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index/m include { BWA_INDEX } from '../../../modules/nf-core/bwa/index/main' include { SAMTOOLS_DICT } from '../../../modules/nf-core/samtools/dict/main' include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx/main' +include { GATK4_BWA_INDEX_IMAGE } from '../../../modules/local/gatk4/bwaindeximage/main' include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate/main' - -include { CUSTOM_EXTRACTTARBALL as DECOMP_BWAMEM2_INDEX } from '../../../modules/local/custom/extract_tarball/main' -include { CUSTOM_EXTRACTTARBALL as DECOMP_GRIDSS_INDEX } from '../../../modules/local/custom/extract_tarball/main' -include { CUSTOM_EXTRACTTARBALL as DECOMP_HMF_DATA } from '../../../modules/local/custom/extract_tarball/main' -include { CUSTOM_EXTRACTTARBALL as DECOMP_PANEL_DATA } from '../../../modules/local/custom/extract_tarball/main' -include { CUSTOM_EXTRACTTARBALL as DECOMP_STAR_INDEX } from '../../../modules/local/custom/extract_tarball/main' -include { GATK4_BWA_INDEX_IMAGE } from '../../../modules/local/gatk4/bwaindeximage/main' -include { GRIDSS_INDEX } from '../../../modules/local/gridss/index/main' -include { WRITE_REFERENCE_DATA } from '../../../modules/local/custom/write_reference_data/main' +include { GRIDSS_INDEX } from '../../../modules/local/gridss/index/main' + +include { CUSTOM_EXTRACTTARBALL as DECOMP_BWAMEM2_INDEX } from '../../../modules/local/custom/extract_tarball/main' +include { CUSTOM_EXTRACTTARBALL as DECOMP_GRIDSS_INDEX } from '../../../modules/local/custom/extract_tarball/main' +include { CUSTOM_EXTRACTTARBALL as DECOMP_HMF_DATA } from '../../../modules/local/custom/extract_tarball/main' +include { CUSTOM_EXTRACTTARBALL as DECOMP_PANEL_DATA } from '../../../modules/local/custom/extract_tarball/main' +include { CUSTOM_EXTRACTTARBALL as DECOMP_STAR_INDEX } from '../../../modules/local/custom/extract_tarball/main' + +include { WRITE_REFERENCE_DATA as WRITE_FASTA } from '../../../modules/local/custom/write_reference_data/main' +include { WRITE_REFERENCE_DATA as WRITE_FAI } from '../../../modules/local/custom/write_reference_data/main' +include { WRITE_REFERENCE_DATA as WRITE_DICT } from '../../../modules/local/custom/write_reference_data/main' +include { WRITE_REFERENCE_DATA as WRITE_IMG } from '../../../modules/local/custom/write_reference_data/main' +include { WRITE_REFERENCE_DATA as WRITE_BWA_INDEX } from '../../../modules/local/custom/write_reference_data/main' +include { WRITE_REFERENCE_DATA as WRITE_GRIDSS_INDEX } from '../../../modules/local/custom/write_reference_data/main' +include { WRITE_REFERENCE_DATA as WRITE_STAR_INDEX } from '../../../modules/local/custom/write_reference_data/main' +include { WRITE_REFERENCE_DATA as WRITE_HMF_DATA } from '../../../modules/local/custom/write_reference_data/main' +include { WRITE_REFERENCE_DATA as WRITE_PANEL_DATA } from '../../../modules/local/custom/write_reference_data/main' workflow PREPARE_REFERENCE { take: - run_config // channel: [mandatory] run configuration + prep_config // channel: [mandatory] configuration indicating which reference data is required + run_config main: // Channel for version.yml files @@ -29,41 +39,54 @@ workflow PREPARE_REFERENCE { ch_versions = Channel.empty() // - // Set some variables for brevity + // Set .fasta and main genome indexes, create if required // - ch_genome_fasta = Channel.fromPath(params.ref_data_genome_fasta) ch_genome_version = Channel.value(params.genome_version) - run_virusinterpreter = run_config.mode !== Constants.RunMode.TARGETED && run_config.stages.virusinterpreter - // - // Set .fai and .dict indexes, create if required - // - ch_genome_fai = getRefFileChannel('ref_data_genome_fai') - if (!params.ref_data_genome_fai) { - SAMTOOLS_FAIDX(ch_genome_fasta) - ch_genome_fai = SAMTOOLS_FAIDX.out.fai - ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + ch_genome_fasta = Channel.empty() + if (prep_config.require_fasta) { + ch_genome_fasta = Channel.fromPath(params.ref_data_genome_fasta) } - ch_genome_dict = getRefFileChannel('ref_data_genome_dict') - if (!params.ref_data_genome_dict) { - SAMTOOLS_DICT(ch_genome_fasta) - ch_genome_dict = SAMTOOLS_DICT.out.dict - ch_versions = ch_versions.mix(SAMTOOLS_DICT.out.versions) + ch_genome_fai = Channel.empty() + if (prep_config.require_fai) { + + ch_genome_fai = getRefFileChannel('ref_data_genome_fai') + if (!params.ref_data_genome_fai) { + SAMTOOLS_FAIDX(ch_genome_fasta) + ch_genome_fai = SAMTOOLS_FAIDX.out.fai + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + } } - ch_genome_img = getRefFileChannel('ref_data_genome_img') - if (!params.ref_data_genome_img) { - GATK4_BWA_INDEX_IMAGE(ch_genome_fasta) - ch_genome_img = GATK4_BWA_INDEX_IMAGE.out.img - ch_versions = ch_versions.mix(GATK4_BWA_INDEX_IMAGE.out.versions) + ch_genome_dict = Channel.empty() + if (prep_config.require_dict) { + + ch_genome_dict = getRefFileChannel('ref_data_genome_dict') + if (!params.ref_data_genome_dict) { + SAMTOOLS_DICT(ch_genome_fasta) + ch_genome_dict = SAMTOOLS_DICT.out.dict + ch_versions = ch_versions.mix(SAMTOOLS_DICT.out.versions) + } + } + + ch_genome_img = Channel.empty() + if (prep_config.require_img) { + + ch_genome_img = getRefFileChannel('ref_data_genome_img') + if (!params.ref_data_genome_img) { + GATK4_BWA_INDEX_IMAGE(ch_genome_fasta) + ch_genome_img = GATK4_BWA_INDEX_IMAGE.out.img + ch_versions = ch_versions.mix(GATK4_BWA_INDEX_IMAGE.out.versions) + } } // // Set bwa-mem2 index, unpack or create if required // ch_genome_bwamem2_index = Channel.empty() - if (run_config.has_dna_fastq && run_config.stages.alignment) { + if (prep_config.require_bwamem2_index) { + if (!params.ref_data_genome_bwamem2_index) { BWAMEM2_INDEX( @@ -92,7 +115,8 @@ workflow PREPARE_REFERENCE { // Set GRIDSS index, unpack or create if required // ch_genome_gridss_index = Channel.empty() - if (run_config.has_dna && (run_config.stages.gridss || run_virusinterpreter)) { + if (prep_config.require_gridss_index) { + if (!params.ref_data_genome_gridss_index) { BWA_INDEX( @@ -129,7 +153,8 @@ workflow PREPARE_REFERENCE { // Set STAR index , unpack or create if required // ch_genome_star_index = Channel.empty() - if (run_config.has_rna_fastq && run_config.stages.alignment) { + if (prep_config.require_star_index) { + if (!params.ref_data_genome_star_index) { STAR_GENOMEGENERATE( @@ -158,25 +183,46 @@ workflow PREPARE_REFERENCE { // Set HMF reference data, unpack if required // ch_hmf_data = Channel.empty() - hmf_data_paths = params.hmf_data_paths[params.genome_version.toString()] - if (params.ref_data_hmf_data_path.endsWith('tar.gz')) { + if (prep_config.require_hmftools_data) { - ch_hmf_data_inputs = Channel.fromPath(params.ref_data_hmf_data_path) - .map { [[id: "${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + hmf_data_paths = params.hmf_data_paths[params.genome_version.toString()] - DECOMP_HMF_DATA(ch_hmf_data_inputs) + if (params.ref_data_hmf_data_path.endsWith('tar.gz')) { - ch_hmf_data = DECOMP_HMF_DATA.out.extracted_dir - .collect() - .map { dir_list -> - assert dir_list.size() == 1 - def dirpath = dir_list[0].toUriString() - return createDataMap(hmf_data_paths, dirpath) - } + ch_hmf_data_inputs = Channel.fromPath(params.ref_data_hmf_data_path) + .map { [[id: "${it.name.replaceAll('\\.tar\\.gz$', '')}"], it] } + + DECOMP_HMF_DATA(ch_hmf_data_inputs) + + ch_hmf_data = DECOMP_HMF_DATA.out.extracted_dir + .collect() + .map { dir_list -> + assert dir_list.size() == 1 + def dirpath = dir_list[0].toUriString() + return createDataMap(hmf_data_paths, dirpath) + } + + } else { + + ch_hmf_data = Channel.value(createDataMap(hmf_data_paths, params.ref_data_hmf_data_path)) - } else { + } - ch_hmf_data = Channel.value(createDataMap(hmf_data_paths, params.ref_data_hmf_data_path)) + if (params.driver_gene_panel) { + + def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) + + if (run_mode !== Constants.RunMode.PANEL_RESOURCE_CREATION) { + log.info "Using custom driver gene panel: ${params.driver_gene_panel}" + } + + def custom_driver_panel = file(params.driver_gene_panel, checkIfExists: true) + ch_hmf_data = ch_hmf_data + .map { d -> + d.driver_gene_panel = custom_driver_panel + return d + } + } } @@ -184,7 +230,7 @@ workflow PREPARE_REFERENCE { // Set panel reference data, unpack if required // ch_panel_data = Channel.empty() - if (run_config.mode === Constants.RunMode.TARGETED) { + if (prep_config.require_panel_data) { panel_data_paths_versions = params.panel_data_paths[params.panel] panel_data_paths = panel_data_paths_versions[params.genome_version.toString()] @@ -214,34 +260,22 @@ workflow PREPARE_REFERENCE { // // Write prepared reference data if requested // - if (params.prepare_reference_only) { + if (prep_config.prepare_ref_data_only || params.prepare_reference_only) { - // Create channel of data files to stage (if not already local) and write - ch_refdata = Channel.empty() - .mix( - ch_genome_fasta, - ch_genome_fai, - ch_genome_dict, - ch_genome_img, - ch_genome_bwamem2_index, - ch_genome_gridss_index, - ch_genome_star_index, - // Also include base paths for hmf_data and panel_data - Channel.empty() - .mix( - ch_hmf_data, - ch_panel_data, - ) - .map { getDataBaseDirectory(it) } - ) + WRITE_FASTA(ch_genome_fasta) + WRITE_FAI(ch_genome_fai) + WRITE_DICT(ch_genome_dict) + WRITE_IMG(ch_genome_img) + WRITE_BWA_INDEX(ch_genome_bwamem2_index) + WRITE_GRIDSS_INDEX(ch_genome_gridss_index) + WRITE_STAR_INDEX(ch_genome_star_index) - WRITE_REFERENCE_DATA( - ch_refdata, - workflow.manifest.version, - ) + WRITE_HMF_DATA(ch_hmf_data.map { getDataBaseDirectory(it) }) + WRITE_PANEL_DATA(ch_panel_data.map { getDataBaseDirectory(it) }) - // Clear all stages to prevent running any analysis + // Clear all stages to prevent running any analysis when driving by samplesheet run_config.stages = [:] + } emit: diff --git a/subworkflows/local/read_alignment_dna/main.nf b/subworkflows/local/read_alignment_dna/main.nf index 9c1b54cc..05c2daee 100644 --- a/subworkflows/local/read_alignment_dna/main.nf +++ b/subworkflows/local/read_alignment_dna/main.nf @@ -66,10 +66,12 @@ workflow READ_ALIGNMENT_DNA { .collect { key, fps -> def (library_id, lane) = key + def sample_id = meta_sample.getOrDefault('longitudinal_sample_id', meta_sample['sample_id']) + def meta_fastq = [ key: meta.group_id, - id: "${meta.group_id}_${meta_sample.sample_id}", - sample_id: meta_sample.sample_id, + id: "${meta.group_id}_${sample_id}", + sample_id: sample_id, library_id: library_id, lane: lane, sample_type: sample_type, diff --git a/subworkflows/local/redux_processing/main.nf b/subworkflows/local/redux_processing/main.nf index 9f9ce0c7..73122664 100644 --- a/subworkflows/local/redux_processing/main.nf +++ b/subworkflows/local/redux_processing/main.nf @@ -90,10 +90,12 @@ workflow REDUX_PROCESSING { ) .map { meta, meta_sample, sample_type, bams, bais -> + def sample_id = meta_sample.getOrDefault('longitudinal_sample_id', meta_sample['sample_id']) + def meta_redux = [ key: meta.group_id, - id: "${meta.group_id}_${meta_sample.sample_id}", - sample_id: meta_sample.sample_id, + id: "${meta.group_id}_${sample_id}", + sample_id: sample_id, sample_type: sample_type, ] diff --git a/subworkflows/local/sage_append/main.nf b/subworkflows/local/sage_append/main.nf index 6a8fbe46..ac5222fa 100644 --- a/subworkflows/local/sage_append/main.nf +++ b/subworkflows/local/sage_append/main.nf @@ -1,19 +1,21 @@ // -// SAGE append adds WTS data to an existing SAGE VCF +// SAGE append adds additional sample data to an existing SAGE VCF // import Constants import Utils -include { SAGE_APPEND as SAGE_APPEND_SOMATIC } from '../../../modules/local/sage/append/main' +include { SAGE_APPEND as SAGE_APPEND_SOMATIC } from '../../../modules/local/sage/append/main' include { SAGE_APPEND as SAGE_APPEND_GERMLINE } from '../../../modules/local/sage/append/main' workflow SAGE_APPEND { take: // Sample data - ch_inputs // channel: [mandatory] [ meta ] - ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] - ch_purple_dir // channel: [mandatory] [ meta, purple_dir ] + ch_inputs // channel: [mandatory] [ meta ] + ch_purple_dir // channel: [mandatory] [ meta, purple_dir ] + ch_tumor_dna_bam // channel: [mandatory] [ meta, bam, bai ] + ch_tumor_dna_tsv // channel: [mandatory] [ meta, dup_freq_tsv, jitter_tsv, ms_tsv ] + ch_tumor_rna_bam // channel: [mandatory] [ meta, bam, bai ] // Reference data genome_fasta // channel: [mandatory] /path/to/genome_fasta @@ -22,30 +24,48 @@ workflow SAGE_APPEND { genome_dict // channel: [mandatory] /path/to/genome_dict // Params - run_germline // boolean: [mandatory] Run germline flag + enable_germline // boolean: [mandatory] Enable germline + targeted_mode // boolean: [mandatory] Running with SAGE arguments for targeted/panel mode? main: // Channel for version.yml files // channel: [ versions.yml ] ch_versions = Channel.empty() + def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) + def purity_estimate_mode = run_mode === Constants.RunMode.PURITY_ESTIMATE + // Select input sources and sort - // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] + // channel: runnable: [ meta, tumor_dna_bam, tumor_dna_bai, [tumor_dna_redux_tsv, ...], tumor_rna_bam, tumor_rna_bai, purple_dir ] // channel: skip: [ meta ] ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_tumor_dna_bam, + ch_tumor_dna_tsv, ch_tumor_rna_bam, ch_purple_dir, ) - .map { meta, tumor_bam, tumor_bai, purple_dir -> + .map { meta, tumor_dna_bam, tumor_dna_bai, tumor_dna_dup_freq_tsv, tumor_dna_jitter_tsv, tumor_dna_ms_tsv, tumor_rna_bam, tumor_rna_bai, purple_dir -> + + def tumor_dna_redux_tsv_list = [ + tumor_dna_jitter_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_JITTER_TSV_TUMOR), + tumor_dna_ms_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_MS_TSV_TUMOR), + ] + + tumor_dna_redux_tsv_list = tumor_dna_redux_tsv_list.findAll { it != [] } + return [ meta, - Utils.selectCurrentOrExisting(tumor_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), - Utils.selectCurrentOrExisting(tumor_bai, meta, Constants.INPUT.BAI_RNA_TUMOR), + Utils.selectCurrentOrExisting(tumor_dna_bam, meta, Constants.INPUT.BAM_REDUX_DNA_TUMOR), + tumor_dna_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_DNA_TUMOR), + tumor_dna_redux_tsv_list, + Utils.selectCurrentOrExisting(tumor_rna_bam, meta, Constants.INPUT.BAM_RNA_TUMOR), + tumor_rna_bai ?: Utils.getInput(meta, Constants.INPUT.BAI_RNA_TUMOR), Utils.selectCurrentOrExisting(purple_dir, meta, Constants.INPUT.PURPLE_DIR), ] } - .branch { meta, tumor_bam, tumor_bai, purple_dir -> - runnable: tumor_bam && purple_dir + .branch { meta, tumor_dna_bam, tumor_dna_bai, tumor_dna_redux_tsv, tumor_rna_bam, tumor_rna_bai, purple_dir -> + def has_bam = tumor_dna_bam || tumor_rna_bam + runnable: has_bam && purple_dir skip: true return meta } @@ -54,40 +74,49 @@ workflow SAGE_APPEND { // MODULE: SAGE append germline // // Select inputs that are eligible to run - // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] + // channel: runnable: [ meta, tumor_dna_bam, tumor_dna_bai, [tumor_dna_redux_tsv, ...], tumor_rna_bam, tumor_rna_bai, purple_dir ] // channel: skip: [ meta ] ch_inputs_germline_sorted = ch_inputs_sorted.runnable - .branch { meta, tumor_bam, tumor_bai, purple_dir -> + .branch { meta, tumor_dna_bam, tumor_dna_bai, tumor_dna_redux_tsv, tumor_rna_bam, tumor_rna_bai, purple_dir -> - def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + // NOTE(SW): explicit in expectation to always obtain the primary tumor DNA sample ID here + def tumor_dna_id = Utils.getTumorDnaSampleName(meta, primary: true) - def has_normal_dna = Utils.hasNormalDna(meta) def has_tumor_rna = Utils.hasTumorRna(meta) + def has_normal_dna = Utils.hasNormalDna(meta) def has_smlv_germline = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_NORMAL) - runnable: has_normal_dna && has_tumor_rna && has_smlv_germline && !has_existing && run_germline + def should_append_rna_variants = has_tumor_rna && has_normal_dna && has_smlv_germline + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_DIR_NORMAL) + + runnable: should_append_rna_variants && !has_existing && enable_germline skip: true return meta } // Create process input channel - // channel: [ meta_append, purple_smlv_vcf, tumor_bam, tumor_bai ] + // channel: [ meta_append, purple_smlv_vcf, [bam, ...], [bai, ...], [tumor_dna_redux_tsv, ...] ] ch_sage_append_germline_inputs = ch_inputs_germline_sorted.runnable - .map { meta, tumor_bam, tumor_bai, purple_dir -> + .map { meta, tumor_dna_bam, tumor_dna_bai, tumor_dna_redux_tsv, tumor_rna_bam, tumor_rna_bai, purple_dir -> - def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + // NOTE(SW): explicit in expectation to always obtain the primary tumor DNA sample ID here + def tumor_dna_id = Utils.getTumorDnaSampleName(meta, primary: true) + def output_file_id = Utils.getNormalDnaSampleName(meta) def meta_append = [ key: meta.group_id, id: meta.group_id, - tumor_rna_id: Utils.getTumorRnaSampleName(meta), - dna_id: Utils.getNormalDnaSampleName(meta), + output_file_id: output_file_id, + reference_ids: [Utils.getTumorRnaSampleName(meta)], ] + def bams = [tumor_rna_bam] + def bais = [tumor_rna_bai] + def purple_smlv_vcf = file(purple_dir).resolve("${tumor_dna_id}.purple.germline.vcf.gz") - return [meta_append, purple_smlv_vcf, tumor_bam, tumor_bai] + return [meta_append, purple_smlv_vcf, bams, bais, []] } // Run process @@ -97,6 +126,7 @@ workflow SAGE_APPEND { genome_version, genome_fai, genome_dict, + targeted_mode, ) ch_versions = ch_versions.mix(SAGE_APPEND_GERMLINE.out.versions) @@ -105,39 +135,62 @@ workflow SAGE_APPEND { // MODULE: SAGE append somatic // // Select inputs that are eligible to run - // channel: runnable: [ meta, tumor_bam, tumor_bai, purple_dir ] + // channel: runnable: [ meta, tumor_dna_bam, tumor_dna_bai, [tumor_dna_redux_tsv, ...], tumor_rna_bam, tumor_rna_bai, purple_dir ] // channel: skip: [ meta ] ch_inputs_somatic_sorted = ch_inputs_sorted.runnable - .branch { meta, tumor_bam, tumor_bai, purple_dir -> - def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + .branch { meta, tumor_dna_bam, tumor_dna_bai, tumor_dna_redux_tsv, tumor_rna_bam, tumor_rna_bai, purple_dir -> + + def tumor_dna_id = Utils.getTumorDnaSampleName(meta, primary: true) - def has_tumor_dna = Utils.hasTumorDna(meta) def has_tumor_rna = Utils.hasTumorRna(meta) + def has_tumor_dna = Utils.hasTumorDna(meta) def has_smlv_somatic = file(purple_dir).resolve("${tumor_dna_id}.purple.somatic.vcf.gz") - def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_VCF_TUMOR) - runnable: has_tumor_dna && has_tumor_rna && has_smlv_somatic && !has_existing + def should_append_rna_variants = !purity_estimate_mode && has_tumor_rna && has_tumor_dna && has_smlv_somatic + def should_append_longitudinal_variants = purity_estimate_mode && has_tumor_dna && has_smlv_somatic + + def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_APPEND_DIR_TUMOR) + + runnable: (should_append_rna_variants || should_append_longitudinal_variants) && !has_existing skip: true return meta } // Create process input channel - // channel: [ meta_append, purple_smlv_vcf, tumor_bam, tumor_bai ] + // channel: [ meta_append, purple_smlv_vcf, [bam, ...], [bai, ...], [tumor_dna_redux_tsv, ...] ] ch_sage_append_somatic_inputs = ch_inputs_somatic_sorted.runnable - .map { meta, tumor_bam, tumor_bai, purple_dir -> + .map { meta, tumor_dna_bam, tumor_dna_bai, tumor_dna_redux_tsv, tumor_rna_bam, tumor_rna_bai, purple_dir -> - def tumor_dna_id = Utils.getTumorDnaSampleName(meta) + def tumor_dna_id = Utils.getTumorDnaSampleName(meta, primary: true) + def output_file_id = purity_estimate_mode ? Utils.getTumorDnaSampleName(meta, primary: false) : tumor_dna_id def meta_append = [ key: meta.group_id, id: meta.group_id, - tumor_rna_id: Utils.getTumorRnaSampleName(meta), - dna_id: Utils.getTumorDnaSampleName(meta), + output_file_id: output_file_id, + reference_ids: [], ] + def bams = [] + def bais = [] + def redux_tsvs = [] + + if (!purity_estimate_mode && tumor_rna_bam) { + meta_append.reference_ids.add(Utils.getTumorRnaSampleName(meta)) + bams.add(tumor_rna_bam) + bais.add(tumor_rna_bai) + } + + if (purity_estimate_mode && tumor_dna_bam) { + meta_append.reference_ids.add(Utils.getTumorDnaSampleName(meta)) + bams.add(tumor_dna_bam) + bais.add(tumor_dna_bai) + redux_tsvs = tumor_dna_redux_tsv + } + def purple_smlv_vcf = file(purple_dir).resolve("${tumor_dna_id}.purple.somatic.vcf.gz") - return [meta_append, purple_smlv_vcf, tumor_bam, tumor_bai] + return [meta_append, purple_smlv_vcf, bams, bais, redux_tsvs] } // Run process @@ -147,29 +200,30 @@ workflow SAGE_APPEND { genome_version, genome_fai, genome_dict, + targeted_mode, ) ch_versions = ch_versions.mix(SAGE_APPEND_SOMATIC.out.versions) // Set outputs, restoring original meta - // channel: [ meta, sage_append_vcf ] - ch_somatic_vcf = Channel.empty() + // channel: [ meta, sage_append_dir ] + ch_somatic_dir = Channel.empty() .mix( - WorkflowOncoanalyser.restoreMeta(SAGE_APPEND_SOMATIC.out.vcf, ch_inputs), + WorkflowOncoanalyser.restoreMeta(SAGE_APPEND_SOMATIC.out.sage_append_dir, ch_inputs), ch_inputs_somatic_sorted.skip.map { meta -> [meta, []] }, ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) - ch_germline_vcf = Channel.empty() + ch_germline_dir = Channel.empty() .mix( - WorkflowOncoanalyser.restoreMeta(SAGE_APPEND_GERMLINE.out.vcf, ch_inputs), + WorkflowOncoanalyser.restoreMeta(SAGE_APPEND_GERMLINE.out.sage_append_dir, ch_inputs), ch_inputs_germline_sorted.skip.map { meta -> [meta, []] }, ch_inputs_sorted.skip.map { meta -> [meta, []] }, ) emit: - somatic_vcf = ch_somatic_vcf // channel: [ meta, sage_append_vcf ] - germline_vcf = ch_germline_vcf // channel: [ meta, sage_append_vcf ] + somatic_dir = ch_somatic_dir // channel: [ meta, sage_append_dir ] + germline_dir = ch_germline_dir // channel: [ meta, sage_append_dir ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/sage_calling/main.nf b/subworkflows/local/sage_calling/main.nf index 62ea4fad..8fc9a9ae 100644 --- a/subworkflows/local/sage_calling/main.nf +++ b/subworkflows/local/sage_calling/main.nf @@ -26,14 +26,16 @@ workflow SAGE_CALLING { genome_version // channel: [mandatory] genome version genome_fai // channel: [mandatory] /path/to/genome_fai genome_dict // channel: [mandatory] /path/to/genome_dict + sage_pon // channel: [mandatory] /path/to/sage_pon sage_known_hotspots_somatic // channel: [mandatory] /path/to/sage_known_hotspots_somatic sage_known_hotspots_germline // channel: [optional] /path/to/sage_known_hotspots_germline - sage_actionable_panel // channel: [mandatory] /path/to/sage_actionable_panel - sage_coverage_panel // channel: [mandatory] /path/to/sage_coverage_panel sage_highconf_regions // channel: [mandatory] /path/to/sage_highconf_regions segment_mappability // channel: [mandatory] /path/to/segment_mappability driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + gnomad_resource // channel: [mandatory] /path/to/gnomad_resource + enable_germline // boolean: [mandatory] Enable germline mode + targeted_mode // boolean: [mandatory] Running in targeted/panel mode? main: // Channel for version.yml files @@ -61,17 +63,14 @@ workflow SAGE_CALLING { donor_dup_freq_tsv, donor_jitter_tsv, donor_ms_tsv -> def redux_tsv_list = [ - tumor_dup_freq_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_DUP_FREQ_TSV_TUMOR), - tumor_jitter_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_JITTER_TSV_TUMOR), - tumor_ms_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_MS_TSV_TUMOR), + tumor_jitter_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_JITTER_TSV_TUMOR), + tumor_ms_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_MS_TSV_TUMOR), - normal_dup_freq_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_DUP_FREQ_TSV_NORMAL), - normal_jitter_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_JITTER_TSV_NORMAL), - normal_ms_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_MS_TSV_NORMAL), + normal_jitter_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_JITTER_TSV_NORMAL), + normal_ms_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_MS_TSV_NORMAL), - donor_dup_freq_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_DUP_FREQ_TSV_DONOR), - donor_jitter_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_JITTER_TSV_DONOR), - donor_ms_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_MS_TSV_DONOR), + donor_jitter_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_JITTER_TSV_DONOR), + donor_ms_tsv ?: Utils.getInput(meta, Constants.INPUT.REDUX_MS_TSV_DONOR), ] redux_tsv_list = redux_tsv_list.findAll{ it != [] } @@ -108,7 +107,7 @@ workflow SAGE_CALLING { def has_tumor_normal = tumor_bam && normal_bam def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_VCF_NORMAL) - runnable: has_tumor_normal && !has_existing + runnable: has_tumor_normal && !has_existing && enable_germline skip: true return meta } @@ -136,10 +135,10 @@ workflow SAGE_CALLING { genome_fai, genome_dict, sage_known_hotspots_germline, - sage_actionable_panel, - sage_coverage_panel, sage_highconf_regions, + driver_gene_panel, ensembl_data_resources, + targeted_mode, ) ch_versions = ch_versions.mix(SAGE_GERMLINE.out.versions) @@ -191,11 +190,13 @@ workflow SAGE_CALLING { genome_version, genome_fai, genome_dict, + sage_pon, sage_known_hotspots_somatic, - sage_actionable_panel, - sage_coverage_panel, sage_highconf_regions, + driver_gene_panel, ensembl_data_resources, + gnomad_resource, + targeted_mode, ) ch_versions = ch_versions.mix(SAGE_SOMATIC.out.versions) diff --git a/subworkflows/local/teal_characterisation/main.nf b/subworkflows/local/teal_characterisation/main.nf index d7124af1..7b9a30c3 100644 --- a/subworkflows/local/teal_characterisation/main.nf +++ b/subworkflows/local/teal_characterisation/main.nf @@ -28,13 +28,12 @@ workflow TEAL_CHARACTERISATION { ch_versions = Channel.empty() // - // MODULE: Teal prep + // MODULE: TEAL prep // - // Make preliminary BAM to be used as input for the main Teal pipeline. This prep step takes 90% of Teal's runtime, - // so it is split from the main pipeline so that we don't have to wait for Purple to finish before we start - // running Teal + // Make preliminary BAM to be used as input for the main TEAL pipeline. This prep step takes 90% of TEAL's runtime, + // so it is split from the main pipeline so that we don't have to wait for PURPLE to finish before we start + // running TEAL // - // Select input sources and sort // channel: runnable: [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] // channel: skip: [ meta ] @@ -83,7 +82,7 @@ workflow TEAL_CHARACTERISATION { // Flatten TEAL_PREP output // channel: [ meta, teal_bam, teal_bai ] - ch_tumor_teal_bam = WorkflowOncoanalyser.restoreMeta(TEAL_PREP.out.tumor_bam, ch_inputs) + ch_tumor_teal_bam = WorkflowOncoanalyser.restoreMeta(TEAL_PREP.out.tumor_teal_bam, ch_inputs) .map { meta, bam_bai -> [meta, *bam_bai] } ch_normal_teal_bam_placeholder = WorkflowOncoanalyser.restoreMeta( @@ -93,14 +92,13 @@ workflow TEAL_CHARACTERISATION { ch_inputs ) - ch_normal_teal_bam = WorkflowOncoanalyser.restoreMeta(TEAL_PREP.out.normal_bam, ch_inputs) + ch_normal_teal_bam = WorkflowOncoanalyser.restoreMeta(TEAL_PREP.out.normal_teal_bam, ch_inputs) .map { meta, bam_bai -> [meta, *bam_bai] } .mix(ch_normal_teal_bam_placeholder) // - // MODULE: Teal pipeline + // MODULE: TEAL pipeline // - // Select input sources and sort // channel: runnable: [ meta, tumor_teal_bam, tumor_teal_bai, normal_teal_bam, normal_teal_bai, tumor_metrics_dir, normal_metrics_dir, cobalt_dir, purple_dir ] // channel: skip: [ meta ] @@ -113,11 +111,12 @@ workflow TEAL_CHARACTERISATION { ch_purple_dir, ) .map { meta, tumor_teal_bam, tumor_teal_bai, normal_teal_bam, normal_teal_bai, tumor_metrics_dir, normal_metrics_dir, cobalt_dir, purple_dir -> - return [ meta, - tumor_teal_bam, tumor_teal_bai, - normal_teal_bam, normal_teal_bai, + tumor_teal_bam, + tumor_teal_bai, + normal_teal_bam, + normal_teal_bai, Utils.selectCurrentOrExisting(tumor_metrics_dir, meta, Constants.INPUT.BAMTOOLS_DIR_TUMOR), Utils.selectCurrentOrExisting(normal_metrics_dir, meta, Constants.INPUT.BAMTOOLS_DIR_NORMAL), Utils.selectCurrentOrExisting(cobalt_dir, meta, Constants.INPUT.COBALT_DIR), @@ -149,7 +148,7 @@ workflow TEAL_CHARACTERISATION { return [ meta_teal, tumor_teal_bam, tumor_teal_bai, normal_teal_bam, normal_teal_bai, tumor_metrics_dir, normal_metrics_dir, cobalt_dir, purple_dir ] } - + // Run process TEAL_PIPELINE( ch_teal_pipeline_inputs, genome_version, @@ -157,7 +156,6 @@ workflow TEAL_CHARACTERISATION { ch_versions = ch_versions.mix(TEAL_PIPELINE.out.versions) - emit: - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/wisp_analysis/main.nf b/subworkflows/local/wisp_analysis/main.nf new file mode 100644 index 00000000..3c3c77b5 --- /dev/null +++ b/subworkflows/local/wisp_analysis/main.nf @@ -0,0 +1,88 @@ +// +// WISP estimates tumor purity in longitudinal samples using WGS data of the primary +// + +import Constants +import Utils + +include { WISP } from '../../../modules/local/wisp/main' + +workflow WISP_ANALYSIS { + take: + // Sample data + ch_inputs // channel: [mandatory] [ meta ] + ch_amber_out // channel: [mandatory] [ meta, amber_dir ] + ch_cobalt_out // channel: [mandatory] [ meta, cobalt_dir ] + ch_sage_somatic_append_out // channel: [mandatory] [ meta, sage_append_dir ] + + // Reference data + genome_fasta // channel: [mandatory] /path/to/genome_fasta + genome_fai // channel: [mandatory] /path/to/genome_fai + + // Params + targeted_mode // boolean: [mandatory] Running in targeted/panel mode? + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Select input sources and sort + // channel: runnable: [ meta, ... ] + // channel: skip: [ meta ] + ch_inputs_sorted = WorkflowOncoanalyser.groupByMeta( + ch_amber_out, + ch_cobalt_out, + ch_sage_somatic_append_out, + ) + .branch { meta, amber_dir, cobalt_dir, sage_append_dir -> + + primary_purple_dir = Utils.getInput(meta, Constants.INPUT.PURPLE_DIR) + primary_amber_dir = Utils.getInput(meta, Constants.INPUT.AMBER_DIR) + + def purity_estimate_mode = Utils.getEnumFromString(params.purity_estimate_mode, Constants.RunMode) + + def runnable + if (purity_estimate_mode === Constants.RunMode.WGTS) { + runnable = primary_purple_dir && primary_amber_dir && sage_append_dir && amber_dir && cobalt_dir + } else { + runnable = primary_purple_dir && sage_append_dir + } + + runnable: runnable + return [meta, primary_purple_dir, primary_amber_dir, amber_dir, cobalt_dir, sage_append_dir] + skip: true + return meta + } + + // Create process input channel + // channel: [ meta_wisp, ... ] + ch_wisp_inputs = ch_inputs_sorted.runnable + + .map { meta, primary_purple_dir, primary_amber_dir, amber_dir, cobalt_dir, sage_append_dir -> + + def meta_wisp = [ + key: meta.group_id, + id: meta.group_id, + subject_id: meta.subject_id, + primary_id: Utils.getTumorDnaSampleName(meta, primary: true), + longitudinal_id: Utils.getTumorDnaSampleName(meta, primary: false), + ] + + return [meta_wisp, primary_purple_dir, primary_amber_dir, amber_dir, cobalt_dir, sage_append_dir] + } + + + // Run process + WISP( + ch_wisp_inputs, + genome_fasta, + genome_fai, + targeted_mode, + ) + + ch_versions = ch_versions.mix(WISP.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 0907ac58..09ef842a 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,5 +1,5 @@ plugins { - id "nf-schema@2.1.0" + id "nf-schema@2.4.2" } validation { diff --git a/tests/.nftignore b/tests/.nftignore index c10bc1f1..158c83c5 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -1,5 +1,6 @@ .DS_Store multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt +multiqc/multiqc_data/BETA-multiqc.parquet multiqc/multiqc_data/multiqc.log multiqc/multiqc_data/multiqc_data.json multiqc/multiqc_data/multiqc_sources.txt diff --git a/tests/default.nf.test b/tests/default.nf.test index 21d50295..55d306d0 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -3,6 +3,7 @@ nextflow_pipeline { name "Test pipeline" script "../main.nf" tag "pipeline" + tag "cicd" profile "test_stub" options "-stub" diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index c805d46d..24db2af7 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -55,30 +55,12 @@ "subject_a/cuppa/subject_a.tumor.cuppa.vis_data.tsv", "subject_a/cuppa/subject_a.tumor.cuppa_data.tsv.gz", "subject_a/esvee", - "subject_a/esvee/assemble", - "subject_a/esvee/assemble/subject_a.tumor.esvee.alignment.tsv", - "subject_a/esvee/assemble/subject_a.tumor.esvee.assembly.tsv", - "subject_a/esvee/assemble/subject_a.tumor.esvee.breakend.tsv", - "subject_a/esvee/assemble/subject_a.tumor.esvee.phased_assembly.tsv", - "subject_a/esvee/assemble/subject_a.tumor.esvee.raw.vcf.gz", - "subject_a/esvee/assemble/subject_a.tumor.esvee.raw.vcf.gz.tbi", - "subject_a/esvee/caller", - "subject_a/esvee/caller/subject_a.tumor.esvee.germline.vcf.gz", - "subject_a/esvee/caller/subject_a.tumor.esvee.germline.vcf.gz.tbi", - "subject_a/esvee/caller/subject_a.tumor.esvee.somatic.vcf.gz", - "subject_a/esvee/caller/subject_a.tumor.esvee.somatic.vcf.gz.tbi", - "subject_a/esvee/caller/subject_a.tumor.esvee.unfiltered.vcf.gz", - "subject_a/esvee/caller/subject_a.tumor.esvee.unfiltered.vcf.gz.tbi", - "subject_a/esvee/depth_annotation", - "subject_a/esvee/depth_annotation/subject_a.tumor.esvee.ref_depth.vcf.gz", - "subject_a/esvee/depth_annotation/subject_a.tumor.esvee.ref_depth.vcf.gz.tbi", - "subject_a/esvee/prep", - "subject_a/esvee/prep/subject_a.normal.esvee.prep.bam", - "subject_a/esvee/prep/subject_a.normal.esvee.prep.bam.bai", - "subject_a/esvee/prep/subject_a.tumor.esvee.prep.bam", - "subject_a/esvee/prep/subject_a.tumor.esvee.prep.bam.bai", - "subject_a/esvee/prep/subject_a.tumor.esvee.prep.fragment_length.tsv", - "subject_a/esvee/prep/subject_a.tumor.esvee.prep.junction.tsv", + "subject_a/esvee/subject_a.tumor.esvee.germline.vcf.gz", + "subject_a/esvee/subject_a.tumor.esvee.germline.vcf.gz.tbi", + "subject_a/esvee/subject_a.tumor.esvee.somatic.vcf.gz", + "subject_a/esvee/subject_a.tumor.esvee.somatic.vcf.gz.tbi", + "subject_a/esvee/subject_a.tumor.esvee.unfiltered.vcf.gz", + "subject_a/esvee/subject_a.tumor.esvee.unfiltered.vcf.gz.tbi", "subject_a/isofox", "subject_a/isofox/placeholder", "subject_a/lilac", @@ -94,14 +76,190 @@ "subject_a/linx/somatic_plots/reportable", "subject_a/linx/somatic_plots/reportable/placeholder", "subject_a/linx/subject_a.tumor_linx.html", + "subject_a/logs", + "subject_a/logs/subject_a.amber.command.err", + "subject_a/logs/subject_a.amber.command.log", + "subject_a/logs/subject_a.amber.command.out", + "subject_a/logs/subject_a.amber.command.run", + "subject_a/logs/subject_a.amber.command.sh", + "subject_a/logs/subject_a.chord.command.err", + "subject_a/logs/subject_a.chord.command.log", + "subject_a/logs/subject_a.chord.command.out", + "subject_a/logs/subject_a.chord.command.run", + "subject_a/logs/subject_a.chord.command.sh", + "subject_a/logs/subject_a.cobalt.command.err", + "subject_a/logs/subject_a.cobalt.command.log", + "subject_a/logs/subject_a.cobalt.command.out", + "subject_a/logs/subject_a.cobalt.command.run", + "subject_a/logs/subject_a.cobalt.command.sh", + "subject_a/logs/subject_a.cuppa.command.err", + "subject_a/logs/subject_a.cuppa.command.log", + "subject_a/logs/subject_a.cuppa.command.out", + "subject_a/logs/subject_a.cuppa.command.run", + "subject_a/logs/subject_a.cuppa.command.sh", + "subject_a/logs/subject_a.esvee.command.err", + "subject_a/logs/subject_a.esvee.command.log", + "subject_a/logs/subject_a.esvee.command.out", + "subject_a/logs/subject_a.esvee.command.run", + "subject_a/logs/subject_a.esvee.command.sh", + "subject_a/logs/subject_a.isofox.command.err", + "subject_a/logs/subject_a.isofox.command.log", + "subject_a/logs/subject_a.isofox.command.out", + "subject_a/logs/subject_a.isofox.command.run", + "subject_a/logs/subject_a.isofox.command.sh", + "subject_a/logs/subject_a.lilac.command.err", + "subject_a/logs/subject_a.lilac.command.log", + "subject_a/logs/subject_a.lilac.command.out", + "subject_a/logs/subject_a.lilac.command.run", + "subject_a/logs/subject_a.lilac.command.sh", + "subject_a/logs/subject_a.linx_germline.command.err", + "subject_a/logs/subject_a.linx_germline.command.log", + "subject_a/logs/subject_a.linx_germline.command.out", + "subject_a/logs/subject_a.linx_germline.command.run", + "subject_a/logs/subject_a.linx_germline.command.sh", + "subject_a/logs/subject_a.linx_somatic.command.err", + "subject_a/logs/subject_a.linx_somatic.command.log", + "subject_a/logs/subject_a.linx_somatic.command.out", + "subject_a/logs/subject_a.linx_somatic.command.run", + "subject_a/logs/subject_a.linx_somatic.command.sh", + "subject_a/logs/subject_a.linx_visualiser.command.err", + "subject_a/logs/subject_a.linx_visualiser.command.log", + "subject_a/logs/subject_a.linx_visualiser.command.out", + "subject_a/logs/subject_a.linx_visualiser.command.run", + "subject_a/logs/subject_a.linx_visualiser.command.sh", + "subject_a/logs/subject_a.linxreport.command.err", + "subject_a/logs/subject_a.linxreport.command.log", + "subject_a/logs/subject_a.linxreport.command.out", + "subject_a/logs/subject_a.linxreport.command.run", + "subject_a/logs/subject_a.linxreport.command.sh", + "subject_a/logs/subject_a.neo_annotate_fusions.command.err", + "subject_a/logs/subject_a.neo_annotate_fusions.command.log", + "subject_a/logs/subject_a.neo_annotate_fusions.command.out", + "subject_a/logs/subject_a.neo_annotate_fusions.command.run", + "subject_a/logs/subject_a.neo_annotate_fusions.command.sh", + "subject_a/logs/subject_a.neo_finder.command.err", + "subject_a/logs/subject_a.neo_finder.command.log", + "subject_a/logs/subject_a.neo_finder.command.out", + "subject_a/logs/subject_a.neo_finder.command.run", + "subject_a/logs/subject_a.neo_finder.command.sh", + "subject_a/logs/subject_a.neo_scorer.command.err", + "subject_a/logs/subject_a.neo_scorer.command.log", + "subject_a/logs/subject_a.neo_scorer.command.out", + "subject_a/logs/subject_a.neo_scorer.command.run", + "subject_a/logs/subject_a.neo_scorer.command.sh", + "subject_a/logs/subject_a.orange.command.err", + "subject_a/logs/subject_a.orange.command.log", + "subject_a/logs/subject_a.orange.command.out", + "subject_a/logs/subject_a.orange.command.run", + "subject_a/logs/subject_a.orange.command.sh", + "subject_a/logs/subject_a.pave_germline.command.err", + "subject_a/logs/subject_a.pave_germline.command.log", + "subject_a/logs/subject_a.pave_germline.command.out", + "subject_a/logs/subject_a.pave_germline.command.run", + "subject_a/logs/subject_a.pave_germline.command.sh", + "subject_a/logs/subject_a.pave_somatic.command.err", + "subject_a/logs/subject_a.pave_somatic.command.log", + "subject_a/logs/subject_a.pave_somatic.command.out", + "subject_a/logs/subject_a.pave_somatic.command.run", + "subject_a/logs/subject_a.pave_somatic.command.sh", + "subject_a/logs/subject_a.peach.command.err", + "subject_a/logs/subject_a.peach.command.log", + "subject_a/logs/subject_a.peach.command.out", + "subject_a/logs/subject_a.peach.command.run", + "subject_a/logs/subject_a.peach.command.sh", + "subject_a/logs/subject_a.purple.command.err", + "subject_a/logs/subject_a.purple.command.log", + "subject_a/logs/subject_a.purple.command.out", + "subject_a/logs/subject_a.purple.command.run", + "subject_a/logs/subject_a.purple.command.sh", + "subject_a/logs/subject_a.sage_append_germline.command.err", + "subject_a/logs/subject_a.sage_append_germline.command.log", + "subject_a/logs/subject_a.sage_append_germline.command.out", + "subject_a/logs/subject_a.sage_append_germline.command.run", + "subject_a/logs/subject_a.sage_append_germline.command.sh", + "subject_a/logs/subject_a.sage_append_somatic.command.err", + "subject_a/logs/subject_a.sage_append_somatic.command.log", + "subject_a/logs/subject_a.sage_append_somatic.command.out", + "subject_a/logs/subject_a.sage_append_somatic.command.run", + "subject_a/logs/subject_a.sage_append_somatic.command.sh", + "subject_a/logs/subject_a.sage_germline.command.err", + "subject_a/logs/subject_a.sage_germline.command.log", + "subject_a/logs/subject_a.sage_germline.command.out", + "subject_a/logs/subject_a.sage_germline.command.run", + "subject_a/logs/subject_a.sage_germline.command.sh", + "subject_a/logs/subject_a.sage_somatic.command.err", + "subject_a/logs/subject_a.sage_somatic.command.log", + "subject_a/logs/subject_a.sage_somatic.command.out", + "subject_a/logs/subject_a.sage_somatic.command.run", + "subject_a/logs/subject_a.sage_somatic.command.sh", + "subject_a/logs/subject_a.sigs.command.err", + "subject_a/logs/subject_a.sigs.command.log", + "subject_a/logs/subject_a.sigs.command.out", + "subject_a/logs/subject_a.sigs.command.run", + "subject_a/logs/subject_a.sigs.command.sh", + "subject_a/logs/subject_a.teal_pipeline.command.err", + "subject_a/logs/subject_a.teal_pipeline.command.log", + "subject_a/logs/subject_a.teal_pipeline.command.out", + "subject_a/logs/subject_a.teal_pipeline.command.run", + "subject_a/logs/subject_a.teal_pipeline.command.sh", + "subject_a/logs/subject_a.teal_prep.command.err", + "subject_a/logs/subject_a.teal_prep.command.log", + "subject_a/logs/subject_a.teal_prep.command.out", + "subject_a/logs/subject_a.teal_prep.command.run", + "subject_a/logs/subject_a.teal_prep.command.sh", + "subject_a/logs/subject_a.virusbreakend.command.err", + "subject_a/logs/subject_a.virusbreakend.command.log", + "subject_a/logs/subject_a.virusbreakend.command.out", + "subject_a/logs/subject_a.virusbreakend.command.run", + "subject_a/logs/subject_a.virusbreakend.command.sh", + "subject_a/logs/subject_a.virusinterpreter.command.err", + "subject_a/logs/subject_a.virusinterpreter.command.log", + "subject_a/logs/subject_a.virusinterpreter.command.out", + "subject_a/logs/subject_a.virusinterpreter.command.run", + "subject_a/logs/subject_a.virusinterpreter.command.sh", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.err", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.log", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.out", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.run", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.sh", + "subject_a/logs/subject_a_subject_a.normal.redux.command.err", + "subject_a/logs/subject_a_subject_a.normal.redux.command.log", + "subject_a/logs/subject_a_subject_a.normal.redux.command.out", + "subject_a/logs/subject_a_subject_a.normal.redux.command.run", + "subject_a/logs/subject_a_subject_a.normal.redux.command.sh", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.err", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.log", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.out", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.run", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.sh", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.err", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.log", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.out", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.run", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.sh", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.err", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.log", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.out", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.run", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.sh", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.err", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.log", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.out", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.run", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.sh", + "subject_a/neo", + "subject_a/neo/annotated_fusions", + "subject_a/neo/annotated_fusions/subject_a.tumor.isf.neoepitope.tsv", + "subject_a/neo/finder", + "subject_a/neo/scorer", "subject_a/orange", "subject_a/orange/subject_a.tumor.orange.json", "subject_a/orange/subject_a.tumor.orange.pdf", "subject_a/pave", - "subject_a/pave/subject_a.tumor.sage.pave_germline.vcf.gz", - "subject_a/pave/subject_a.tumor.sage.pave_germline.vcf.gz.tbi", - "subject_a/pave/subject_a.tumor.sage.pave_somatic.vcf.gz", - "subject_a/pave/subject_a.tumor.sage.pave_somatic.vcf.gz.tbi", + "subject_a/pave/subject_a.tumor.pave.germline.vcf.gz", + "subject_a/pave/subject_a.tumor.pave.germline.vcf.gz.tbi", + "subject_a/pave/subject_a.tumor.pave.somatic.vcf.gz", + "subject_a/pave/subject_a.tumor.pave.somatic.vcf.gz.tbi", "subject_a/peach", "subject_a/peach/subject_a.normal.peach.events.tsv", "subject_a/peach/subject_a.normal.peach.gene.events.tsv", @@ -119,26 +277,34 @@ "subject_a/purple/subject_a.tumor.purple.somatic.vcf.gz", "subject_a/purple/subject_a.tumor.purple.sv.germline.vcf.gz", "subject_a/purple/subject_a.tumor.purple.sv.vcf.gz", - "subject_a/sage", - "subject_a/sage/append", - "subject_a/sage/append/subject_a.normal.sage.append.vcf.gz", - "subject_a/sage/append/subject_a.tumor.sage.append.vcf.gz", - "subject_a/sage/germline", - "subject_a/sage/germline/subject_a.normal.gene.coverage.tsv", - "subject_a/sage/germline/subject_a.normal.sage.bqr.png", - "subject_a/sage/germline/subject_a.normal.sage.bqr.tsv", - "subject_a/sage/germline/subject_a.tumor.sage.bqr.png", - "subject_a/sage/germline/subject_a.tumor.sage.bqr.tsv", - "subject_a/sage/germline/subject_a.tumor.sage.germline.vcf.gz", - "subject_a/sage/germline/subject_a.tumor.sage.germline.vcf.gz.tbi", - "subject_a/sage/somatic", - "subject_a/sage/somatic/subject_a.normal.sage.bqr.png", - "subject_a/sage/somatic/subject_a.normal.sage.bqr.tsv", - "subject_a/sage/somatic/subject_a.tumor.gene.coverage.tsv", - "subject_a/sage/somatic/subject_a.tumor.sage.bqr.png", - "subject_a/sage/somatic/subject_a.tumor.sage.bqr.tsv", - "subject_a/sage/somatic/subject_a.tumor.sage.somatic.vcf.gz", - "subject_a/sage/somatic/subject_a.tumor.sage.somatic.vcf.gz.tbi", + "subject_a/sage_append", + "subject_a/sage_append/germline", + "subject_a/sage_append/germline/subject_a.normal.frag_lengths.tsv.gz", + "subject_a/sage_append/germline/subject_a.normal.sage.append.vcf.gz", + "subject_a/sage_append/germline/subject_a.normal.sage.append.vcf.gz.tbi", + "subject_a/sage_append/germline/subject_a.normal_query.sage.bqr.tsv", + "subject_a/sage_append/somatic", + "subject_a/sage_append/somatic/subject_a.tumor.frag_lengths.tsv.gz", + "subject_a/sage_append/somatic/subject_a.tumor.sage.append.vcf.gz", + "subject_a/sage_append/somatic/subject_a.tumor.sage.append.vcf.gz.tbi", + "subject_a/sage_append/somatic/subject_a.tumor_query.sage.bqr.tsv", + "subject_a/sage_calling", + "subject_a/sage_calling/germline", + "subject_a/sage_calling/germline/subject_a.normal.gene.coverage.tsv", + "subject_a/sage_calling/germline/subject_a.normal.sage.bqr.png", + "subject_a/sage_calling/germline/subject_a.normal.sage.bqr.tsv", + "subject_a/sage_calling/germline/subject_a.tumor.sage.bqr.png", + "subject_a/sage_calling/germline/subject_a.tumor.sage.bqr.tsv", + "subject_a/sage_calling/germline/subject_a.tumor.sage.germline.vcf.gz", + "subject_a/sage_calling/germline/subject_a.tumor.sage.germline.vcf.gz.tbi", + "subject_a/sage_calling/somatic", + "subject_a/sage_calling/somatic/subject_a.normal.sage.bqr.png", + "subject_a/sage_calling/somatic/subject_a.normal.sage.bqr.tsv", + "subject_a/sage_calling/somatic/subject_a.tumor.gene.coverage.tsv", + "subject_a/sage_calling/somatic/subject_a.tumor.sage.bqr.png", + "subject_a/sage_calling/somatic/subject_a.tumor.sage.bqr.tsv", + "subject_a/sage_calling/somatic/subject_a.tumor.sage.somatic.vcf.gz", + "subject_a/sage_calling/somatic/subject_a.tumor.sage.somatic.vcf.gz.tbi", "subject_a/sigs", "subject_a/sigs/placeholder", "subject_a/teal", @@ -158,8 +324,8 @@ ], "meta": { "nf-test": "0.9.2", - "nextflow": "24.10.0" + "nextflow": "25.04.6" }, - "timestamp": "2025-06-11T12:53:28.525304" + "timestamp": "2025-08-12T16:05:52.737409" } } \ No newline at end of file diff --git a/tests/profile_test.nf.test b/tests/profile_test.nf.test new file mode 100644 index 00000000..ae3f3333 --- /dev/null +++ b/tests/profile_test.nf.test @@ -0,0 +1,33 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + + profile "test" + + test("-profile test") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // All stable path name, with a relative path + stable_name, + ).match() } + ) + } + } +} diff --git a/tests/profile_test.nf.test.snap b/tests/profile_test.nf.test.snap new file mode 100644 index 00000000..cacd6411 --- /dev/null +++ b/tests/profile_test.nf.test.snap @@ -0,0 +1,440 @@ +{ + "-profile test": { + "content": [ + 43, + [ + "pipeline_info", + "pipeline_info/software_versions.yml", + "subject_a", + "subject_a/alignments", + "subject_a/alignments/dna", + "subject_a/alignments/dna/subject_a.normal.jitter_params.tsv", + "subject_a/alignments/dna/subject_a.normal.ms_table.tsv.gz", + "subject_a/alignments/dna/subject_a.normal.redux.bam", + "subject_a/alignments/dna/subject_a.normal.redux.bam.bai", + "subject_a/alignments/dna/subject_a.normal.redux.duplicate_freq.tsv", + "subject_a/alignments/dna/subject_a.tumor.jitter_params.tsv", + "subject_a/alignments/dna/subject_a.tumor.ms_table.tsv.gz", + "subject_a/alignments/dna/subject_a.tumor.redux.bam", + "subject_a/alignments/dna/subject_a.tumor.redux.bam.bai", + "subject_a/alignments/dna/subject_a.tumor.redux.duplicate_freq.tsv", + "subject_a/alignments/rna", + "subject_a/alignments/rna/subject_a.tumor_rna.md.bam", + "subject_a/alignments/rna/subject_a.tumor_rna.md.bam.bai", + "subject_a/alignments/rna/subject_a.tumor_rna.md.metrics", + "subject_a/amber", + "subject_a/amber/amber.version", + "subject_a/amber/subject_a.normal.amber.homozygousregion.tsv", + "subject_a/amber/subject_a.normal.amber.snp.vcf.gz", + "subject_a/amber/subject_a.normal.amber.snp.vcf.gz.tbi", + "subject_a/amber/subject_a.tumor.amber.baf.pcf", + "subject_a/amber/subject_a.tumor.amber.baf.tsv.gz", + "subject_a/amber/subject_a.tumor.amber.contamination.tsv", + "subject_a/amber/subject_a.tumor.amber.contamination.vcf.gz", + "subject_a/amber/subject_a.tumor.amber.contamination.vcf.gz.tbi", + "subject_a/amber/subject_a.tumor.amber.qc", + "subject_a/bamtools", + "subject_a/bamtools/subject_a_subject_a.normal_bamtools", + "subject_a/bamtools/subject_a_subject_a.normal_bamtools/subject_a.normal.bam_metric.coverage.tsv", + "subject_a/bamtools/subject_a_subject_a.normal_bamtools/subject_a.normal.bam_metric.exon_medians.tsv", + "subject_a/bamtools/subject_a_subject_a.normal_bamtools/subject_a.normal.bam_metric.flag_counts.tsv", + "subject_a/bamtools/subject_a_subject_a.normal_bamtools/subject_a.normal.bam_metric.frag_length.tsv", + "subject_a/bamtools/subject_a_subject_a.normal_bamtools/subject_a.normal.bam_metric.gene_coverage.tsv", + "subject_a/bamtools/subject_a_subject_a.normal_bamtools/subject_a.normal.bam_metric.partition_stats.tsv", + "subject_a/bamtools/subject_a_subject_a.normal_bamtools/subject_a.normal.bam_metric.summary.tsv", + "subject_a/bamtools/subject_a_subject_a.tumor_bamtools", + "subject_a/bamtools/subject_a_subject_a.tumor_bamtools/subject_a.tumor.bam_metric.coverage.tsv", + "subject_a/bamtools/subject_a_subject_a.tumor_bamtools/subject_a.tumor.bam_metric.exon_medians.tsv", + "subject_a/bamtools/subject_a_subject_a.tumor_bamtools/subject_a.tumor.bam_metric.flag_counts.tsv", + "subject_a/bamtools/subject_a_subject_a.tumor_bamtools/subject_a.tumor.bam_metric.frag_length.tsv", + "subject_a/bamtools/subject_a_subject_a.tumor_bamtools/subject_a.tumor.bam_metric.gene_coverage.tsv", + "subject_a/bamtools/subject_a_subject_a.tumor_bamtools/subject_a.tumor.bam_metric.partition_stats.tsv", + "subject_a/bamtools/subject_a_subject_a.tumor_bamtools/subject_a.tumor.bam_metric.summary.tsv", + "subject_a/chord", + "subject_a/chord/subject_a.tumor.chord.mutation_contexts.tsv", + "subject_a/chord/subject_a.tumor.chord.prediction.tsv", + "subject_a/cider", + "subject_a/cider/subject_a.tumor.cider.bam", + "subject_a/cider/subject_a.tumor.cider.blastn_match.tsv.gz", + "subject_a/cider/subject_a.tumor.cider.layout.gz", + "subject_a/cider/subject_a.tumor.cider.locus_stats.tsv", + "subject_a/cider/subject_a.tumor.cider.vdj.tsv.gz", + "subject_a/cider/subject_a.tumor_rna.cider.bam", + "subject_a/cider/subject_a.tumor_rna.cider.blastn_match.tsv.gz", + "subject_a/cider/subject_a.tumor_rna.cider.layout.gz", + "subject_a/cider/subject_a.tumor_rna.cider.locus_stats.tsv", + "subject_a/cider/subject_a.tumor_rna.cider.vdj.tsv.gz", + "subject_a/cobalt", + "subject_a/cobalt/cobalt.version", + "subject_a/cobalt/subject_a.normal.cobalt.gc.median.tsv", + "subject_a/cobalt/subject_a.normal.cobalt.ratio.median.tsv", + "subject_a/cobalt/subject_a.normal.cobalt.ratio.pcf", + "subject_a/cobalt/subject_a.tumor.cobalt.gc.median.tsv", + "subject_a/cobalt/subject_a.tumor.cobalt.ratio.pcf", + "subject_a/cobalt/subject_a.tumor.cobalt.ratio.tsv.gz", + "subject_a/cuppa", + "subject_a/cuppa/subject_a.tumor.cuppa.pred_summ.tsv", + "subject_a/cuppa/subject_a.tumor.cuppa.vis.png", + "subject_a/cuppa/subject_a.tumor.cuppa.vis_data.tsv", + "subject_a/cuppa/subject_a.tumor.cuppa_data.tsv.gz", + "subject_a/esvee", + "subject_a/esvee/subject_a.normal.esvee.prep.bam", + "subject_a/esvee/subject_a.normal.esvee.prep.bam.bai", + "subject_a/esvee/subject_a.tumor.esvee.alignment.tsv", + "subject_a/esvee/subject_a.tumor.esvee.assembly.tsv", + "subject_a/esvee/subject_a.tumor.esvee.breakend.tsv", + "subject_a/esvee/subject_a.tumor.esvee.germline.vcf.gz", + "subject_a/esvee/subject_a.tumor.esvee.germline.vcf.gz.tbi", + "subject_a/esvee/subject_a.tumor.esvee.phased_assembly.tsv", + "subject_a/esvee/subject_a.tumor.esvee.prep.bam", + "subject_a/esvee/subject_a.tumor.esvee.prep.bam.bai", + "subject_a/esvee/subject_a.tumor.esvee.prep.disc_stats.tsv", + "subject_a/esvee/subject_a.tumor.esvee.prep.fragment_length.tsv", + "subject_a/esvee/subject_a.tumor.esvee.prep.junction.tsv", + "subject_a/esvee/subject_a.tumor.esvee.raw.vcf.gz", + "subject_a/esvee/subject_a.tumor.esvee.raw.vcf.gz.tbi", + "subject_a/esvee/subject_a.tumor.esvee.ref_depth.vcf.gz", + "subject_a/esvee/subject_a.tumor.esvee.ref_depth.vcf.gz.tbi", + "subject_a/esvee/subject_a.tumor.esvee.somatic.vcf.gz", + "subject_a/esvee/subject_a.tumor.esvee.somatic.vcf.gz.tbi", + "subject_a/esvee/subject_a.tumor.esvee.unfiltered.vcf.gz", + "subject_a/esvee/subject_a.tumor.esvee.unfiltered.vcf.gz.tbi", + "subject_a/isofox", + "subject_a/isofox/subject_a.tumor_rna.isf.alt_splice_junc.csv", + "subject_a/isofox/subject_a.tumor_rna.isf.fusions.csv", + "subject_a/isofox/subject_a.tumor_rna.isf.gene_collection.csv", + "subject_a/isofox/subject_a.tumor_rna.isf.gene_data.csv", + "subject_a/isofox/subject_a.tumor_rna.isf.pass_fusions.csv", + "subject_a/isofox/subject_a.tumor_rna.isf.retained_intron.csv", + "subject_a/isofox/subject_a.tumor_rna.isf.summary.csv", + "subject_a/isofox/subject_a.tumor_rna.isf.transcript_data.csv", + "subject_a/lilac", + "subject_a/lilac/subject_a.tumor.lilac.candidates.coverage.tsv", + "subject_a/lilac/subject_a.tumor.lilac.qc.tsv", + "subject_a/lilac/subject_a.tumor.lilac.tsv", + "subject_a/linx", + "subject_a/linx/germline_annotations", + "subject_a/linx/germline_annotations/linx.version", + "subject_a/linx/germline_annotations/subject_a.tumor.linx.germline.breakend.tsv", + "subject_a/linx/germline_annotations/subject_a.tumor.linx.germline.clusters.tsv", + "subject_a/linx/germline_annotations/subject_a.tumor.linx.germline.disruption.tsv", + "subject_a/linx/germline_annotations/subject_a.tumor.linx.germline.driver.catalog.tsv", + "subject_a/linx/germline_annotations/subject_a.tumor.linx.germline.links.tsv", + "subject_a/linx/germline_annotations/subject_a.tumor.linx.germline.svs.tsv", + "subject_a/linx/somatic_annotations", + "subject_a/linx/somatic_annotations/linx.version", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.breakend.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.clusters.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.driver.catalog.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.drivers.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.fusion.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.links.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.neoepitope.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.svs.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.vis_copy_number.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.vis_fusion.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.vis_gene_exon.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.vis_protein_domain.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.vis_segments.tsv", + "subject_a/linx/somatic_annotations/subject_a.tumor.linx.vis_sv_data.tsv", + "subject_a/linx/somatic_plots", + "subject_a/linx/somatic_plots/all", + "subject_a/linx/somatic_plots/all/subject_a.tumor.chr11.007.png", + "subject_a/linx/somatic_plots/all/subject_a.tumor.chr19.000.png", + "subject_a/linx/somatic_plots/all/subject_a.tumor.chr9.007.png", + "subject_a/linx/somatic_plots/all/subject_a.tumor.cluster-0.sv2.003.png", + "subject_a/linx/somatic_plots/all/subject_a.tumor.cluster-1.sv2.007.png", + "subject_a/linx/somatic_plots/reportable", + "subject_a/linx/somatic_plots/reportable/subject_a.tumor.cluster-1.sv2.007.png", + "subject_a/linx/subject_a.tumor_linx.html", + "subject_a/logs", + "subject_a/logs/subject_a.amber.command.err", + "subject_a/logs/subject_a.amber.command.log", + "subject_a/logs/subject_a.amber.command.out", + "subject_a/logs/subject_a.amber.command.run", + "subject_a/logs/subject_a.amber.command.sh", + "subject_a/logs/subject_a.chord.command.err", + "subject_a/logs/subject_a.chord.command.log", + "subject_a/logs/subject_a.chord.command.out", + "subject_a/logs/subject_a.chord.command.run", + "subject_a/logs/subject_a.chord.command.sh", + "subject_a/logs/subject_a.cobalt.command.err", + "subject_a/logs/subject_a.cobalt.command.log", + "subject_a/logs/subject_a.cobalt.command.out", + "subject_a/logs/subject_a.cobalt.command.run", + "subject_a/logs/subject_a.cobalt.command.sh", + "subject_a/logs/subject_a.cuppa.command.err", + "subject_a/logs/subject_a.cuppa.command.log", + "subject_a/logs/subject_a.cuppa.command.out", + "subject_a/logs/subject_a.cuppa.command.run", + "subject_a/logs/subject_a.cuppa.command.sh", + "subject_a/logs/subject_a.esvee.command.err", + "subject_a/logs/subject_a.esvee.command.log", + "subject_a/logs/subject_a.esvee.command.out", + "subject_a/logs/subject_a.esvee.command.run", + "subject_a/logs/subject_a.esvee.command.sh", + "subject_a/logs/subject_a.gatk4_markduplicates.command.log", + "subject_a/logs/subject_a.gatk4_markduplicates.command.sh", + "subject_a/logs/subject_a.isofox.command.err", + "subject_a/logs/subject_a.isofox.command.log", + "subject_a/logs/subject_a.isofox.command.out", + "subject_a/logs/subject_a.isofox.command.run", + "subject_a/logs/subject_a.isofox.command.sh", + "subject_a/logs/subject_a.lilac.command.err", + "subject_a/logs/subject_a.lilac.command.log", + "subject_a/logs/subject_a.lilac.command.out", + "subject_a/logs/subject_a.lilac.command.run", + "subject_a/logs/subject_a.lilac.command.sh", + "subject_a/logs/subject_a.linx_germline.command.err", + "subject_a/logs/subject_a.linx_germline.command.log", + "subject_a/logs/subject_a.linx_germline.command.out", + "subject_a/logs/subject_a.linx_germline.command.run", + "subject_a/logs/subject_a.linx_germline.command.sh", + "subject_a/logs/subject_a.linx_somatic.command.err", + "subject_a/logs/subject_a.linx_somatic.command.log", + "subject_a/logs/subject_a.linx_somatic.command.out", + "subject_a/logs/subject_a.linx_somatic.command.run", + "subject_a/logs/subject_a.linx_somatic.command.sh", + "subject_a/logs/subject_a.linx_visualiser.command.err", + "subject_a/logs/subject_a.linx_visualiser.command.log", + "subject_a/logs/subject_a.linx_visualiser.command.out", + "subject_a/logs/subject_a.linx_visualiser.command.run", + "subject_a/logs/subject_a.linx_visualiser.command.sh", + "subject_a/logs/subject_a.linxreport.command.err", + "subject_a/logs/subject_a.linxreport.command.log", + "subject_a/logs/subject_a.linxreport.command.out", + "subject_a/logs/subject_a.linxreport.command.run", + "subject_a/logs/subject_a.linxreport.command.sh", + "subject_a/logs/subject_a.neo_annotate_fusions.command.err", + "subject_a/logs/subject_a.neo_annotate_fusions.command.log", + "subject_a/logs/subject_a.neo_annotate_fusions.command.out", + "subject_a/logs/subject_a.neo_annotate_fusions.command.run", + "subject_a/logs/subject_a.neo_annotate_fusions.command.sh", + "subject_a/logs/subject_a.neo_finder.command.err", + "subject_a/logs/subject_a.neo_finder.command.log", + "subject_a/logs/subject_a.neo_finder.command.out", + "subject_a/logs/subject_a.neo_finder.command.run", + "subject_a/logs/subject_a.neo_finder.command.sh", + "subject_a/logs/subject_a.neo_scorer.command.err", + "subject_a/logs/subject_a.neo_scorer.command.log", + "subject_a/logs/subject_a.neo_scorer.command.out", + "subject_a/logs/subject_a.neo_scorer.command.run", + "subject_a/logs/subject_a.neo_scorer.command.sh", + "subject_a/logs/subject_a.orange.command.err", + "subject_a/logs/subject_a.orange.command.log", + "subject_a/logs/subject_a.orange.command.out", + "subject_a/logs/subject_a.orange.command.run", + "subject_a/logs/subject_a.orange.command.sh", + "subject_a/logs/subject_a.pave_germline.command.err", + "subject_a/logs/subject_a.pave_germline.command.log", + "subject_a/logs/subject_a.pave_germline.command.out", + "subject_a/logs/subject_a.pave_germline.command.run", + "subject_a/logs/subject_a.pave_germline.command.sh", + "subject_a/logs/subject_a.pave_somatic.command.err", + "subject_a/logs/subject_a.pave_somatic.command.log", + "subject_a/logs/subject_a.pave_somatic.command.out", + "subject_a/logs/subject_a.pave_somatic.command.run", + "subject_a/logs/subject_a.pave_somatic.command.sh", + "subject_a/logs/subject_a.peach.command.err", + "subject_a/logs/subject_a.peach.command.log", + "subject_a/logs/subject_a.peach.command.out", + "subject_a/logs/subject_a.peach.command.run", + "subject_a/logs/subject_a.peach.command.sh", + "subject_a/logs/subject_a.purple.command.err", + "subject_a/logs/subject_a.purple.command.log", + "subject_a/logs/subject_a.purple.command.out", + "subject_a/logs/subject_a.purple.command.run", + "subject_a/logs/subject_a.purple.command.sh", + "subject_a/logs/subject_a.sage_append_germline.command.err", + "subject_a/logs/subject_a.sage_append_germline.command.log", + "subject_a/logs/subject_a.sage_append_germline.command.out", + "subject_a/logs/subject_a.sage_append_germline.command.run", + "subject_a/logs/subject_a.sage_append_germline.command.sh", + "subject_a/logs/subject_a.sage_append_somatic.command.err", + "subject_a/logs/subject_a.sage_append_somatic.command.log", + "subject_a/logs/subject_a.sage_append_somatic.command.out", + "subject_a/logs/subject_a.sage_append_somatic.command.run", + "subject_a/logs/subject_a.sage_append_somatic.command.sh", + "subject_a/logs/subject_a.sage_germline.command.err", + "subject_a/logs/subject_a.sage_germline.command.log", + "subject_a/logs/subject_a.sage_germline.command.out", + "subject_a/logs/subject_a.sage_germline.command.run", + "subject_a/logs/subject_a.sage_germline.command.sh", + "subject_a/logs/subject_a.sage_somatic.command.err", + "subject_a/logs/subject_a.sage_somatic.command.log", + "subject_a/logs/subject_a.sage_somatic.command.out", + "subject_a/logs/subject_a.sage_somatic.command.run", + "subject_a/logs/subject_a.sage_somatic.command.sh", + "subject_a/logs/subject_a.sigs.command.err", + "subject_a/logs/subject_a.sigs.command.log", + "subject_a/logs/subject_a.sigs.command.out", + "subject_a/logs/subject_a.sigs.command.run", + "subject_a/logs/subject_a.sigs.command.sh", + "subject_a/logs/subject_a.teal_pipeline.command.err", + "subject_a/logs/subject_a.teal_pipeline.command.log", + "subject_a/logs/subject_a.teal_pipeline.command.out", + "subject_a/logs/subject_a.teal_pipeline.command.run", + "subject_a/logs/subject_a.teal_pipeline.command.sh", + "subject_a/logs/subject_a.teal_prep.command.err", + "subject_a/logs/subject_a.teal_prep.command.log", + "subject_a/logs/subject_a.teal_prep.command.out", + "subject_a/logs/subject_a.teal_prep.command.run", + "subject_a/logs/subject_a.teal_prep.command.sh", + "subject_a/logs/subject_a.virusbreakend.command.err", + "subject_a/logs/subject_a.virusbreakend.command.log", + "subject_a/logs/subject_a.virusbreakend.command.out", + "subject_a/logs/subject_a.virusbreakend.command.run", + "subject_a/logs/subject_a.virusbreakend.command.sh", + "subject_a/logs/subject_a.virusinterpreter.command.err", + "subject_a/logs/subject_a.virusinterpreter.command.log", + "subject_a/logs/subject_a.virusinterpreter.command.out", + "subject_a/logs/subject_a.virusinterpreter.command.run", + "subject_a/logs/subject_a.virusinterpreter.command.sh", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.err", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.log", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.out", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.run", + "subject_a/logs/subject_a_subject_a.normal.bamtools.command.sh", + "subject_a/logs/subject_a_subject_a.normal.redux.command.err", + "subject_a/logs/subject_a_subject_a.normal.redux.command.log", + "subject_a/logs/subject_a_subject_a.normal.redux.command.out", + "subject_a/logs/subject_a_subject_a.normal.redux.command.run", + "subject_a/logs/subject_a_subject_a.normal.redux.command.sh", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.err", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.log", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.out", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.run", + "subject_a/logs/subject_a_subject_a.tumor.bamtools.command.sh", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.err", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.log", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.out", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.run", + "subject_a/logs/subject_a_subject_a.tumor.cider.command.sh", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.err", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.log", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.out", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.run", + "subject_a/logs/subject_a_subject_a.tumor.redux.command.sh", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.err", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.log", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.out", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.run", + "subject_a/logs/subject_a_subject_a.tumor_rna.cider.command.sh", + "subject_a/neo", + "subject_a/neo/annotated_fusions", + "subject_a/neo/annotated_fusions/subject_a.tumor.isf.neoepitope.tsv", + "subject_a/neo/finder", + "subject_a/neo/finder/subject_a.tumor.neo.neo_data.tsv", + "subject_a/neo/scorer", + "subject_a/neo/scorer/subject_a.tumor.neo.neoepitope.tsv", + "subject_a/neo/scorer/subject_a.tumor.neo.peptide_scores.tsv", + "subject_a/orange", + "subject_a/orange/subject_a.tumor.orange.json", + "subject_a/orange/subject_a.tumor.orange.pdf", + "subject_a/pave", + "subject_a/pave/subject_a.tumor.pave.germline.vcf.gz", + "subject_a/pave/subject_a.tumor.pave.germline.vcf.gz.tbi", + "subject_a/pave/subject_a.tumor.pave.somatic.vcf.gz", + "subject_a/pave/subject_a.tumor.pave.somatic.vcf.gz.tbi", + "subject_a/peach", + "subject_a/peach/subject_a.normal.peach.events.tsv", + "subject_a/peach/subject_a.normal.peach.gene.events.tsv", + "subject_a/peach/subject_a.normal.peach.haplotypes.all.tsv", + "subject_a/peach/subject_a.normal.peach.haplotypes.best.tsv", + "subject_a/peach/subject_a.normal.peach.qc.tsv", + "subject_a/purple", + "subject_a/purple/circos", + "subject_a/purple/circos/gaps.txt", + "subject_a/purple/circos/subject_a.normal.ratio.circos", + "subject_a/purple/circos/subject_a.tumor.baf.circos", + "subject_a/purple/circos/subject_a.tumor.circos.conf", + "subject_a/purple/circos/subject_a.tumor.cnv.circos", + "subject_a/purple/circos/subject_a.tumor.indel.circos", + "subject_a/purple/circos/subject_a.tumor.input.conf", + "subject_a/purple/circos/subject_a.tumor.link.circos", + "subject_a/purple/circos/subject_a.tumor.map.circos", + "subject_a/purple/circos/subject_a.tumor.ratio.circos", + "subject_a/purple/circos/subject_a.tumor.snp.circos", + "subject_a/purple/plot", + "subject_a/purple/plot/subject_a.tumor.circos.png", + "subject_a/purple/plot/subject_a.tumor.copynumber.png", + "subject_a/purple/plot/subject_a.tumor.input.png", + "subject_a/purple/plot/subject_a.tumor.map.png", + "subject_a/purple/plot/subject_a.tumor.purity.range.png", + "subject_a/purple/plot/subject_a.tumor.segment.png", + "subject_a/purple/plot/subject_a.tumor.somatic.clonality.png", + "subject_a/purple/plot/subject_a.tumor.somatic.png", + "subject_a/purple/plot/subject_a.tumor.somatic.rainfall.png", + "subject_a/purple/plot/subject_a.tumor.somatic_data.tsv", + "subject_a/purple/purple.version", + "subject_a/purple/subject_a.tumor.purple.cnv.gene.tsv", + "subject_a/purple/subject_a.tumor.purple.cnv.somatic.tsv", + "subject_a/purple/subject_a.tumor.purple.driver.catalog.germline.tsv", + "subject_a/purple/subject_a.tumor.purple.driver.catalog.somatic.tsv", + "subject_a/purple/subject_a.tumor.purple.germline.deletion.tsv", + "subject_a/purple/subject_a.tumor.purple.germline.vcf.gz", + "subject_a/purple/subject_a.tumor.purple.germline.vcf.gz.tbi", + "subject_a/purple/subject_a.tumor.purple.purity.range.tsv", + "subject_a/purple/subject_a.tumor.purple.purity.tsv", + "subject_a/purple/subject_a.tumor.purple.qc", + "subject_a/purple/subject_a.tumor.purple.segment.tsv", + "subject_a/purple/subject_a.tumor.purple.somatic.clonality.tsv", + "subject_a/purple/subject_a.tumor.purple.somatic.hist.tsv", + "subject_a/purple/subject_a.tumor.purple.somatic.vcf.gz", + "subject_a/purple/subject_a.tumor.purple.somatic.vcf.gz.tbi", + "subject_a/purple/subject_a.tumor.purple.sv.germline.vcf.gz", + "subject_a/purple/subject_a.tumor.purple.sv.germline.vcf.gz.tbi", + "subject_a/purple/subject_a.tumor.purple.sv.vcf.gz", + "subject_a/purple/subject_a.tumor.purple.sv.vcf.gz.tbi", + "subject_a/sage_append", + "subject_a/sage_append/germline", + "subject_a/sage_append/germline/subject_a.normal.sage.append.frag_lengths.tsv.gz", + "subject_a/sage_append/germline/subject_a.normal.sage.append.vcf.gz", + "subject_a/sage_append/germline/subject_a.normal.sage.append.vcf.gz.tbi", + "subject_a/sage_append/germline/subject_a.tumor_rna.sage.bqr.tsv", + "subject_a/sage_append/somatic", + "subject_a/sage_append/somatic/subject_a.tumor.sage.append.frag_lengths.tsv.gz", + "subject_a/sage_append/somatic/subject_a.tumor.sage.append.vcf.gz", + "subject_a/sage_append/somatic/subject_a.tumor.sage.append.vcf.gz.tbi", + "subject_a/sage_append/somatic/subject_a.tumor_rna.sage.bqr.tsv", + "subject_a/sage_calling", + "subject_a/sage_calling/germline", + "subject_a/sage_calling/germline/subject_a.normal.sage.bqr.png", + "subject_a/sage_calling/germline/subject_a.normal.sage.bqr.tsv", + "subject_a/sage_calling/germline/subject_a.tumor.sage.bqr.png", + "subject_a/sage_calling/germline/subject_a.tumor.sage.bqr.tsv", + "subject_a/sage_calling/germline/subject_a.tumor.sage.germline.vcf.gz", + "subject_a/sage_calling/germline/subject_a.tumor.sage.germline.vcf.gz.tbi", + "subject_a/sage_calling/somatic", + "subject_a/sage_calling/somatic/subject_a.normal.sage.bqr.png", + "subject_a/sage_calling/somatic/subject_a.normal.sage.bqr.tsv", + "subject_a/sage_calling/somatic/subject_a.tumor.sage.bqr.png", + "subject_a/sage_calling/somatic/subject_a.tumor.sage.bqr.tsv", + "subject_a/sage_calling/somatic/subject_a.tumor.sage.somatic.fit_variant.tsv", + "subject_a/sage_calling/somatic/subject_a.tumor.sage.somatic.vcf.gz", + "subject_a/sage_calling/somatic/subject_a.tumor.sage.somatic.vcf.gz.tbi", + "subject_a/sigs", + "subject_a/sigs/subject_a.tumor.sig.allocation.tsv", + "subject_a/sigs/subject_a.tumor.sig.snv_counts.csv", + "subject_a/teal", + "subject_a/teal/subject_a.normal.teal.telbam.bam", + "subject_a/teal/subject_a.normal.teal.telbam.bam.bai", + "subject_a/teal/subject_a.normal.teal.tellength.tsv", + "subject_a/teal/subject_a.tumor.teal.breakend.tsv.gz", + "subject_a/teal/subject_a.tumor.teal.telbam.bam", + "subject_a/teal/subject_a.tumor.teal.telbam.bam.bai", + "subject_a/teal/subject_a.tumor.teal.tellength.tsv", + "subject_a/virusbreakend", + "subject_a/virusbreakend/subject_a.tumor.virusbreakend.vcf", + "subject_a/virusbreakend/subject_a.tumor.virusbreakend.vcf.summary.tsv", + "subject_a/virusinterpreter", + "subject_a/virusinterpreter/subject_a.tumor.virus.annotated.tsv" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-01T00:24:15.919488538" + } +} \ No newline at end of file diff --git a/tower.yml b/tower.yml index 787aedfe..346d6463 100644 --- a/tower.yml +++ b/tower.yml @@ -1,5 +1,7 @@ reports: - multiqc_report.html: - display: "MultiQC HTML report" samplesheet.csv: display: "Auto-created samplesheet with collated metadata and FASTQ paths" + "**/linx/*linx.html": + display: "LINX report" + "**/orange/*.orange.pdf": + display: "ORANGE report" diff --git a/workflows/panel_resource_creation.nf b/workflows/panel_resource_creation.nf new file mode 100644 index 00000000..00069890 --- /dev/null +++ b/workflows/panel_resource_creation.nf @@ -0,0 +1,285 @@ +import Constants +import Processes +import Utils + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { AMBER_PROFILING } from '../subworkflows/local/amber_profiling' +include { COBALT_NORMALISATION } from '../subworkflows/local/cobalt_normalisation' +include { COBALT_PROFILING } from '../subworkflows/local/cobalt_profiling' +include { ISOFOX_NORMALISATION } from '../subworkflows/local/isofox_normalisation' +include { ISOFOX_QUANTIFICATION } from '../subworkflows/local/isofox_quantification' +include { PAVE_PON_CREATION } from '../subworkflows/local/pave_pon_creation' +include { PREPARE_REFERENCE } from '../subworkflows/local/prepare_reference' +include { READ_ALIGNMENT_DNA } from '../subworkflows/local/read_alignment_dna' +include { READ_ALIGNMENT_RNA } from '../subworkflows/local/read_alignment_rna' +include { REDUX_PROCESSING } from '../subworkflows/local/redux_processing' +include { SAGE_CALLING } from '../subworkflows/local/sage_calling' + +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PANEL_RESOURCE_CREATION { + take: + inputs + run_config + + main: + // Check input path parameters to see if they exist + def checkPathParamList = [ + params.isofox_counts, + params.isofox_gc_ratios, + params.isofox_gene_ids, + params.isofox_tpm_norm, + params.driver_gene_panel, + params.target_regions_bed, + ] + + if (run_config.stages.lilac) { + if (params.genome_version.toString() == '38' && params.genome_type == 'alt' && params.containsKey('ref_data_hla_slice_bed')) { + checkPathParamList.add(params.ref_data_hla_slice_bed) + } + } + + for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + + // Set input paths + target_regions_bed = params.target_regions_bed ? file(params.target_regions_bed) : [] + driver_gene_panel = params.driver_gene_panel ? file(params.driver_gene_panel) : [] + isofox_gene_ids = params.isofox_gene_ids ? file(params.isofox_gene_ids) : [] + + // Create channel for versions + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create input channel from parsed CSV + // channel: [ meta ] + ch_inputs = Channel.fromList(inputs) + + // Set up reference data, assign more human readable variables + prep_config = WorkflowMain.getPrepConfigFromSamplesheet(run_config) + PREPARE_REFERENCE( + prep_config, + run_config, + ) + ref_data = PREPARE_REFERENCE.out + hmf_data = PREPARE_REFERENCE.out.hmf_data + + ch_versions = ch_versions.mix(PREPARE_REFERENCE.out.versions) + + // + // SUBWORKFLOW: Run read alignment to generate BAMs + // + READ_ALIGNMENT_DNA( + ch_inputs, + ref_data.genome_fasta, + ref_data.genome_bwamem2_index, + params.max_fastq_records, + params.fastp_umi_enabled, + params.fastp_umi_location, + params.fastp_umi_length, + params.fastp_umi_skip, + ) + + READ_ALIGNMENT_RNA( + ch_inputs, + ref_data.genome_star_index, + ) + + // channel: [ meta, [bam, ...], [bai, ...] ] + ch_versions = ch_versions.mix( + READ_ALIGNMENT_DNA.out.versions, + READ_ALIGNMENT_RNA.out.versions, + ) + + // channel: [ meta, [bam, ...], [bai, ...] ] + ch_align_dna_tumor_out = READ_ALIGNMENT_DNA.out.dna_tumor + ch_align_dna_normal_out = READ_ALIGNMENT_DNA.out.dna_normal + ch_align_rna_tumor_out = READ_ALIGNMENT_RNA.out.rna_tumor + + // + // SUBWORKFLOW: Run REDUX for DNA BAMs + // + REDUX_PROCESSING( + ch_inputs, + ch_align_dna_tumor_out, + ch_align_dna_normal_out, + ch_inputs.map { meta -> [meta, [], []] }, // ch_dna_donor + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.unmap_regions, + hmf_data.msi_jitter_sites, + params.redux_umi_enabled, + params.redux_umi_duplex_delim, + ) + + ch_versions = ch_versions.mix(REDUX_PROCESSING.out.versions) + + // channel: [ meta, bam, bai ] + ch_redux_dna_tumor_out = REDUX_PROCESSING.out.dna_tumor + ch_redux_dna_normal_out = REDUX_PROCESSING.out.dna_normal + + // channel: [ meta, dup_freq_tsv, jitter_tsv, ms_tsv, repeat_tsv ] + ch_redux_dna_tumor_tsv_out = REDUX_PROCESSING.out.dna_tumor_tsv + ch_redux_dna_normal_tsv_out = REDUX_PROCESSING.out.dna_normal_tsv + + // + // MODULE: Run Isofox to analyse RNA data + // + isofox_counts = params.isofox_counts ? file(params.isofox_counts) : hmf_data.isofox_counts + isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : hmf_data.isofox_gc_ratios + isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED + + ISOFOX_QUANTIFICATION( + ch_inputs, + ch_align_rna_tumor_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.ensembl_data_resources, + hmf_data.known_fusion_data, + isofox_counts, + isofox_gc_ratios, + [], // isofox_gene_ids + [], // isofox_tpm_norm + 'TRANSCRIPT_COUNTS', + isofox_read_length, + ) + + ch_versions = ch_versions.mix(ISOFOX_QUANTIFICATION.out.versions) + + // channel: [ meta, isofox_dir ] + ch_isofox_out = ISOFOX_QUANTIFICATION.out.isofox_dir + + // + // SUBWORKFLOW: Run AMBER to obtain b-allele frequencies + // + AMBER_PROFILING( + ch_inputs, + ch_redux_dna_tumor_out, + ch_redux_dna_normal_out, + ch_inputs.map { meta -> [meta, [], []] }, // ch_donor_bam + ref_data.genome_version, + hmf_data.heterozygous_sites, + target_regions_bed, + 2, // tumor_min_depth + ) + + ch_versions = ch_versions.mix(AMBER_PROFILING.out.versions) + + // channel: [ meta, amber_dir ] + ch_amber_out = AMBER_PROFILING.out.amber_dir + + // + // SUBWORKFLOW: Run COBALT to obtain read ratios + // + COBALT_PROFILING( + ch_inputs, + ch_redux_dna_tumor_out, + ch_redux_dna_normal_out, + hmf_data.gc_profile, + hmf_data.diploid_bed, + [], // panel_target_region_normalisation + true, // targeted_mode + ) + + ch_versions = ch_versions.mix(COBALT_PROFILING.out.versions) + + // channel: [ meta, cobalt_dir ] + ch_cobalt_out = COBALT_PROFILING.out.cobalt_dir + + + // SUBWORKFLOW: call SNV, MNV, and small INDELS with SAGE + // + SAGE_CALLING( + ch_inputs, + ch_redux_dna_tumor_out, + ch_redux_dna_normal_out, + ch_inputs.map { meta -> [meta, [], []] }, // ch_donor_bam + ch_redux_dna_tumor_tsv_out, + ch_redux_dna_normal_tsv_out, + ch_inputs.map { meta -> [meta, [], [], []] }, // ch_donor_tsv + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.sage_pon, + hmf_data.sage_known_hotspots_somatic, + hmf_data.sage_known_hotspots_germline, + hmf_data.sage_highconf_regions, + hmf_data.segment_mappability, + driver_gene_panel, + hmf_data.ensembl_data_resources, + hmf_data.gnomad_resource, + true, // enable_germline + true, // targeted_mode + ) + + ch_versions = ch_versions.mix(SAGE_CALLING.out.versions) + + // channel: [ meta, sage_vcf, sage_tbi ] + ch_sage_somatic_vcf_out = SAGE_CALLING.out.somatic_vcf + + // + // SUBWORKFLOW: Run COBALT normalisation + // + COBALT_NORMALISATION( + ch_amber_out, + ch_cobalt_out, + ref_data.genome_version, + hmf_data.gc_profile, + target_regions_bed, + ) + + ch_versions = ch_versions.mix(COBALT_NORMALISATION.out.versions) + + // + // SUBWORKFLOW: Run PAVE panel of normals creation + // + PAVE_PON_CREATION( + ch_sage_somatic_vcf_out, + ref_data.genome_version, + ) + + ch_versions = ch_versions.mix(PAVE_PON_CREATION.out.versions) + + // + // SUBWORKFLOW: Run Isofox TPM normalisation + // + ISOFOX_NORMALISATION( + ch_isofox_out, + ref_data.genome_version, + isofox_gene_ids, + hmf_data.gene_exp_distribution, + ) + + ch_versions = ch_versions.mix(ISOFOX_NORMALISATION.out.versions) + + // + // TASK: Aggregate software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_versions.yml', + sort: true, + newLine: true, + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/prepare_reference.nf b/workflows/prepare_reference.nf new file mode 100644 index 00000000..23fab2b6 --- /dev/null +++ b/workflows/prepare_reference.nf @@ -0,0 +1,51 @@ +import Constants +import Processes +import Utils + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { PREPARE_REFERENCE as STAGE_REFERENCE } from '../subworkflows/local/prepare_reference' + +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PREPARE_REFERENCE { + // Create channel for versions + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Stage in reference data as requested + prep_config = WorkflowMain.getPrepConfigFromCli(params, log) + STAGE_REFERENCE( + prep_config, + [:], + ) + + ch_versions = ch_versions.mix(STAGE_REFERENCE.out.versions) + + // + // TASK: Aggregate software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_versions.yml', + sort: true, + newLine: true, + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/purity_estimate.nf b/workflows/purity_estimate.nf new file mode 100644 index 00000000..467313ae --- /dev/null +++ b/workflows/purity_estimate.nf @@ -0,0 +1,264 @@ +import Constants +import Processes +import Utils + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { AMBER_PROFILING } from '../subworkflows/local/amber_profiling' +include { COBALT_PROFILING } from '../subworkflows/local/cobalt_profiling' +include { PREPARE_REFERENCE } from '../subworkflows/local/prepare_reference' +include { READ_ALIGNMENT_DNA } from '../subworkflows/local/read_alignment_dna' +include { REDUX_PROCESSING } from '../subworkflows/local/redux_processing' +include { SAGE_APPEND } from '../subworkflows/local/sage_append' +include { WISP_ANALYSIS } from '../subworkflows/local/wisp_analysis' + +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PURITY_ESTIMATE { + take: + inputs + run_config + + main: + // Create channel for versions + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create input channel from parsed CSV + // channel: [ meta ] + ch_inputs = Channel.fromList(inputs) + + // Get run mode of purity estimate mode + purity_estimate_run_mode = Utils.getEnumFromString(params.purity_estimate_mode, Constants.RunMode) + + // Set up reference data, assign more human readable variables + prep_config = WorkflowMain.getPrepConfigFromSamplesheet(run_config) + PREPARE_REFERENCE( + prep_config, + run_config, + ) + ref_data = PREPARE_REFERENCE.out + hmf_data = PREPARE_REFERENCE.out.hmf_data + + ch_versions = ch_versions.mix(PREPARE_REFERENCE.out.versions) + + // + // SUBWORKFLOW: Run read alignment to generate BAMs + // + // channel: [ meta, [bam, ...], [bai, ...] ] + ch_align_dna_tumor_out = Channel.empty() + ch_align_dna_normal_out = Channel.empty() + ch_align_dna_donor_out = Channel.empty() + ch_align_rna_tumor_out = Channel.empty() + if (run_config.stages.alignment) { + + READ_ALIGNMENT_DNA( + ch_inputs, + ref_data.genome_fasta, + ref_data.genome_bwamem2_index, + params.max_fastq_records, + params.fastp_umi_enabled, + params.fastp_umi_location, + params.fastp_umi_length, + params.fastp_umi_skip, + ) + + ch_versions = ch_versions.mix(READ_ALIGNMENT_DNA.out.versions) + + ch_align_dna_tumor_out = ch_align_dna_tumor_out.mix(READ_ALIGNMENT_DNA.out.dna_tumor) + ch_align_dna_normal_out = ch_align_dna_normal_out.mix(READ_ALIGNMENT_DNA.out.dna_normal) + ch_align_dna_donor_out = ch_align_dna_donor_out.mix(READ_ALIGNMENT_DNA.out.dna_donor) + + } else { + + ch_align_dna_tumor_out = ch_inputs.map { meta -> [meta, [], []] } + ch_align_dna_normal_out = ch_inputs.map { meta -> [meta, [], []] } + ch_align_dna_donor_out = ch_inputs.map { meta -> [meta, [], []] } + + } + + // + // SUBWORKFLOW: Run REDUX for DNA BAMs + // + // channel: [ meta, bam, bai ] + ch_redux_dna_tumor_out = Channel.empty() + ch_redux_dna_normal_out = Channel.empty() + ch_redux_dna_donor_out = Channel.empty() + + // channel: [ meta, dup_freq_tsv, jitter_tsv, ms_tsv, repeat_tsv ] + ch_redux_dna_tumor_tsv_out = Channel.empty() + ch_redux_dna_normal_tsv_out = Channel.empty() + ch_redux_dna_donor_tsv_out = Channel.empty() + + if (run_config.stages.redux) { + + REDUX_PROCESSING( + ch_inputs, + ch_align_dna_tumor_out, + ch_align_dna_normal_out, + ch_align_dna_donor_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.unmap_regions, + hmf_data.msi_jitter_sites, + params.redux_umi_enabled, + params.redux_umi_duplex_delim, + ) + + ch_versions = ch_versions.mix(REDUX_PROCESSING.out.versions) + + ch_redux_dna_tumor_out = ch_redux_dna_tumor_out.mix(REDUX_PROCESSING.out.dna_tumor) + ch_redux_dna_normal_out = ch_redux_dna_normal_out.mix(REDUX_PROCESSING.out.dna_normal) + ch_redux_dna_donor_out = ch_redux_dna_donor_out.mix(REDUX_PROCESSING.out.dna_donor) + + ch_redux_dna_tumor_tsv_out = ch_redux_dna_tumor_tsv_out.mix(REDUX_PROCESSING.out.dna_tumor_tsv) + ch_redux_dna_normal_tsv_out = ch_redux_dna_normal_tsv_out.mix(REDUX_PROCESSING.out.dna_normal_tsv) + ch_redux_dna_donor_tsv_out = ch_redux_dna_donor_tsv_out.mix(REDUX_PROCESSING.out.dna_donor_tsv) + + } else { + + ch_redux_dna_tumor_out = ch_inputs.map { meta -> [meta, [], []] } + ch_redux_dna_normal_out = ch_inputs.map { meta -> [meta, [], []] } + ch_redux_dna_donor_out = ch_inputs.map { meta -> [meta, [], []] } + + ch_redux_dna_tumor_tsv_out = ch_inputs.map { meta -> [meta, [], [], []] } + ch_redux_dna_normal_tsv_out = ch_inputs.map { meta -> [meta, [], [], []] } + ch_redux_dna_donor_tsv_out = ch_inputs.map { meta -> [meta, [], [], []] } + + } + + // + // SUBWORKFLOW: Run AMBER to obtain b-allele frequencies + // + // channel: [ meta, amber_dir ] + ch_amber_out = Channel.empty() + if (run_config.stages.amber && purity_estimate_run_mode === Constants.RunMode.WGTS) { + + tumor_min_depth = purity_estimate_run_mode === Constants.RunMode.WGTS ? 1 : [] + + AMBER_PROFILING( + ch_inputs, + ch_redux_dna_tumor_out, + ch_redux_dna_normal_out, + ch_redux_dna_donor_out, + ref_data.genome_version, + hmf_data.heterozygous_sites, + [], // target_region_bed + tumor_min_depth, + ) + + ch_versions = ch_versions.mix(AMBER_PROFILING.out.versions) + + ch_amber_out = ch_amber_out.mix(AMBER_PROFILING.out.amber_dir) + + } else { + + ch_amber_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run COBALT to obtain read ratios + // + // channel: [ meta, cobalt_dir ] + ch_cobalt_out = Channel.empty() + if (run_config.stages.cobalt && purity_estimate_run_mode === Constants.RunMode.WGTS) { + + COBALT_PROFILING( + ch_inputs, + ch_redux_dna_tumor_out, + ch_redux_dna_normal_out, + hmf_data.gc_profile, + hmf_data.diploid_bed, + [], // panel_target_region_normalisation + purity_estimate_run_mode === Constants.RunMode.TARGETED, // targeted_mode + ) + + ch_versions = ch_versions.mix(COBALT_PROFILING.out.versions) + + ch_cobalt_out = ch_cobalt_out.mix(COBALT_PROFILING.out.cobalt_dir) + + } else { + + ch_cobalt_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Append new sample data to primary SAGE WGS VCF + // + // channel: [ meta, sage_append_dir ] + ch_sage_somatic_append_out = Channel.empty() + if (run_config.stages.orange) { + + SAGE_APPEND( + ch_inputs, + ch_inputs.map { meta -> [meta, []] }, // ch_purple_dir + ch_redux_dna_tumor_out, + ch_redux_dna_tumor_tsv_out, + ch_inputs.map { meta -> [meta, [], []] }, // ch_tumor_rna_bam + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + false, // run_germline + purity_estimate_run_mode === Constants.RunMode.TARGETED, // targeted_mode + ) + + ch_versions = ch_versions.mix(SAGE_APPEND.out.versions) + ch_sage_somatic_append_out = ch_sage_somatic_append_out.mix(SAGE_APPEND.out.somatic_dir) + + } else { + + ch_sage_somatic_append_out = ch_inputs.map { meta -> [meta, []] } + + } + + // + // SUBWORKFLOW: Run WISP to estimate tumor purity + // + if (run_config.stages.wisp) { + + WISP_ANALYSIS( + ch_inputs, + ch_amber_out, + ch_cobalt_out, + ch_sage_somatic_append_out, + ref_data.genome_fasta, + ref_data.genome_fai, + purity_estimate_run_mode === Constants.RunMode.TARGETED, // targeted_mode + ) + + ch_versions = ch_versions.mix(WISP_ANALYSIS.out.versions) + + } + + // + // TASK: Aggregate software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_versions.yml', + sort: true, + newLine: true, + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/targeted.nf b/workflows/targeted.nf index 34dfff2f..637884e3 100644 --- a/workflows/targeted.nf +++ b/workflows/targeted.nf @@ -2,53 +2,12 @@ import Constants import Processes import Utils - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -// Parse input samplesheet -// NOTE(SW): this is done early and outside of gpars so that we can access synchronously and prior to pipeline execution -inputs = Utils.parseInput(params.input, workflow.stubRun, log) - -// Get run config -run_config = WorkflowMain.getRunConfig(params, inputs, log) - -// Validate inputs -Utils.validateInput(inputs, run_config, params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.isofox_counts, - params.isofox_gc_ratios, - params.isofox_gene_ids, - params.isofox_tpm_norm, -] - -if (run_config.stages.lilac) { - if (params.genome_version.toString() == '38' && params.genome_type == 'alt' && params.containsKey('ref_data_hla_slice_bed')) { - checkPathParamList.add(params.ref_data_hla_slice_bed) - } -} - -// TODO(SW): consider whether we should check for null entries here for errors to be more informative -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - -// Used in Isofox subworkflow only -isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' - include { AMBER_PROFILING } from '../subworkflows/local/amber_profiling' include { BAMTOOLS_METRICS } from '../subworkflows/local/bamtools_metrics' include { CIDER_CALLING } from '../subworkflows/local/cider_calling' @@ -69,16 +28,36 @@ include { REDUX_PROCESSING } from '../subworkflows/local/redux_processing' include { SAGE_APPEND } from '../subworkflows/local/sage_append' include { SAGE_CALLING } from '../subworkflows/local/sage_calling' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Get absolute file paths -samplesheet = Utils.getFileObject(params.input) - workflow TARGETED { + take: + inputs + run_config + + main: + // Check input path parameters to see if they exist + def checkPathParamList = [ + params.isofox_counts, + params.isofox_gc_ratios, + params.isofox_gene_ids, + params.isofox_tpm_norm, + ] + + if (run_config.stages.lilac) { + if (params.genome_version.toString() == '38' && params.genome_type == 'alt' && params.containsKey('ref_data_hla_slice_bed')) { + checkPathParamList.add(params.ref_data_hla_slice_bed) + } + } + + for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + // Create channel for versions // channel: [ versions.yml ] ch_versions = Channel.empty() @@ -88,7 +67,9 @@ workflow TARGETED { ch_inputs = Channel.fromList(inputs) // Set up reference data, assign more human readable variables + prep_config = WorkflowMain.getPrepConfigFromSamplesheet(run_config) PREPARE_REFERENCE( + prep_config, run_config, ) ref_data = PREPARE_REFERENCE.out @@ -112,7 +93,7 @@ workflow TARGETED { ref_data.genome_fasta, ref_data.genome_bwamem2_index, params.max_fastq_records, - params.fastp_umi, + params.fastp_umi_enabled, params.fastp_umi_location, params.fastp_umi_length, params.fastp_umi_skip, @@ -168,7 +149,7 @@ workflow TARGETED { ref_data.genome_dict, hmf_data.unmap_regions, hmf_data.msi_jitter_sites, - params.redux_umi, + params.redux_umi_enabled, params.redux_umi_duplex_delim, ) @@ -188,25 +169,28 @@ workflow TARGETED { ch_redux_dna_normal_out = ch_inputs.map { meta -> [meta, [], []] } ch_redux_dna_donor_out = ch_inputs.map { meta -> [meta, [], []] } - ch_redux_dna_tumor_tsv_out = ch_inputs.map { meta -> [meta, [], [], [], []] } - ch_redux_dna_normal_tsv_out = ch_inputs.map { meta -> [meta, [], [], [], []] } - ch_redux_dna_donor_tsv_out = ch_inputs.map { meta -> [meta, [], [], [], []] } + ch_redux_dna_tumor_tsv_out = ch_inputs.map { meta -> [meta, [], [], []] } + ch_redux_dna_normal_tsv_out = ch_inputs.map { meta -> [meta, [], [], []] } + ch_redux_dna_donor_tsv_out = ch_inputs.map { meta -> [meta, [], [], []] } } // // MODULE: Run Isofox to analyse RNA data // + + isofox_counts = params.isofox_counts ? file(params.isofox_counts) : panel_data.isofox_counts + isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : panel_data.isofox_gc_ratios + + isofox_gene_ids = params.isofox_gene_ids ? file(params.isofox_gene_ids) : panel_data.isofox_gene_ids + isofox_tpm_norm = params.isofox_tpm_norm ? file(params.isofox_tpm_norm) : panel_data.isofox_tpm_norm + + isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED + // channel: [ meta, isofox_dir ] ch_isofox_out = Channel.empty() if (run_config.stages.isofox) { - isofox_counts = params.isofox_counts ? file(params.isofox_counts) : panel_data.isofox_counts - isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : panel_data.isofox_gc_ratios - - isofox_gene_ids = params.isofox_gene_ids ? file(params.isofox_gene_ids) : panel_data.isofox_gene_ids - isofox_tpm_norm = params.isofox_tpm_norm ? file(params.isofox_tpm_norm) : panel_data.isofox_tpm_norm - ISOFOX_QUANTIFICATION( ch_inputs, ch_align_rna_tumor_out, @@ -248,6 +232,7 @@ workflow TARGETED { ref_data.genome_version, hmf_data.heterozygous_sites, panel_data.target_region_bed, + [], // tumor_min_depth ) ch_versions = ch_versions.mix(AMBER_PROFILING.out.versions) @@ -274,6 +259,7 @@ workflow TARGETED { hmf_data.gc_profile, hmf_data.diploid_bed, panel_data.target_region_normalisation, + true, // targeted_mode ) ch_versions = ch_versions.mix(COBALT_PROFILING.out.versions) @@ -303,12 +289,11 @@ workflow TARGETED { ref_data.genome_fai, ref_data.genome_dict, ref_data.genome_img, - hmf_data.sv_prep_blocklist, hmf_data.known_fusions, hmf_data.gridss_pon_breakends, hmf_data.gridss_pon_breakpoints, - hmf_data.repeatmasker_annotations, hmf_data.decoy_sequences_image, + hmf_data.repeatmasker_annotations, hmf_data.unmap_regions, ) @@ -347,14 +332,16 @@ workflow TARGETED { ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, + hmf_data.sage_pon, hmf_data.sage_known_hotspots_somatic, hmf_data.sage_known_hotspots_germline, - panel_data.sage_actionable_panel, - panel_data.sage_coverage_panel, hmf_data.sage_highconf_regions, hmf_data.segment_mappability, panel_data.driver_gene_panel, hmf_data.ensembl_data_resources, + hmf_data.gnomad_resource, + true, // enable_germline + true, // targeted_mode ) ch_versions = ch_versions.mix(SAGE_CALLING.out.versions) @@ -388,8 +375,8 @@ workflow TARGETED { ref_data.genome_fasta, ref_data.genome_version, ref_data.genome_fai, - hmf_data.sage_pon, panel_data.pon_artefacts, + hmf_data.sage_pon, hmf_data.sage_blocklist_regions, hmf_data.sage_blocklist_sites, hmf_data.clinvar_annotations, @@ -452,30 +439,31 @@ workflow TARGETED { } // - // SUBWORKFLOW: Append RNA data to SAGE VCF + // SUBWORKFLOW: Append read data to SAGE VCF // // channel: [ meta, sage_append_vcf ] ch_sage_somatic_append_out = Channel.empty() ch_sage_germline_append_out = Channel.empty() if (run_config.stages.orange) { - // NOTE(SW): currently used only for ORANGE but will also be used for Neo once implemented - SAGE_APPEND( ch_inputs, - ch_align_rna_tumor_out, ch_purple_out, + ch_inputs.map { meta -> [meta, [], []] }, // ch_tumor_redux_bam + ch_inputs.map { meta -> [meta, [], [], []] }, // ch_tumor_redux_tsv + ch_align_rna_tumor_out, ref_data.genome_fasta, ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, - true, // run_germline + true, // enable_germline + true, // targeted_mode ) ch_versions = ch_versions.mix(SAGE_APPEND.out.versions) - ch_sage_somatic_append_out = ch_sage_somatic_append_out.mix(SAGE_APPEND.out.somatic_vcf) - ch_sage_germline_append_out = ch_sage_germline_append_out.mix(SAGE_APPEND.out.germline_vcf) + ch_sage_somatic_append_out = ch_sage_somatic_append_out.mix(SAGE_APPEND.out.somatic_dir) + ch_sage_germline_append_out = ch_sage_germline_append_out.mix(SAGE_APPEND.out.germline_dir) } else { @@ -551,6 +539,8 @@ workflow TARGETED { ch_redux_dna_normal_out, ref_data.genome_fasta, ref_data.genome_version, + panel_data.driver_gene_panel, + hmf_data.ensembl_data_resources, ) ch_versions = ch_versions.mix(BAMTOOLS_METRICS.out.versions) @@ -603,6 +593,7 @@ workflow TARGETED { ref_data.genome_fai, hmf_data.lilac_resources, ref_data_hla_slice_bed, + true, // targeted_mode ) ch_versions = ch_versions.mix(LILAC_CALLING.out.versions) diff --git a/workflows/wgts.nf b/workflows/wgts.nf index 4d4beaf4..73511b72 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -2,50 +2,12 @@ import Constants import Processes import Utils -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -// Parse input samplesheet -// NOTE(SW): this is done early and outside of gpars so that we can access synchronously and prior to pipeline execution -inputs = Utils.parseInput(params.input, workflow.stubRun, log) - -// Get run config -run_config = WorkflowMain.getRunConfig(params, inputs, log) - -// Validate inputs -Utils.validateInput(inputs, run_config, params, log) - -// Check input path parameters to see if they exist -def checkPathParamList = [ - params.isofox_counts, - params.isofox_gc_ratios, -] - -if (run_config.stages.lilac) { - if (params.genome_version.toString() == '38' && params.genome_type == 'alt' && params.containsKey('ref_data_hla_slice_bed')) { - checkPathParamList.add(params.ref_data_hla_slice_bed) - } -} - -// TODO(SW): consider whether we should check for null entries here for errors to be more informative -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - -// Used in Isofox and Neo subworkflows -isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_WTS - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' - include { AMBER_PROFILING } from '../subworkflows/local/amber_profiling' include { BAMTOOLS_METRICS } from '../subworkflows/local/bamtools_metrics' include { CHORD_PREDICTION } from '../subworkflows/local/chord_prediction' @@ -72,16 +34,34 @@ include { SIGS_FITTING } from '../subworkflows/local/sigs_fitting' include { TEAL_CHARACTERISATION } from '../subworkflows/local/teal_characterisation' include { VIRUSBREAKEND_CALLING } from '../subworkflows/local/virusbreakend_calling' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Get absolute file paths -samplesheet = Utils.getFileObject(params.input) - workflow WGTS { + take: + inputs + run_config + + main: + // Check input path parameters to see if they exist + def checkPathParamList = [ + params.isofox_counts, + params.isofox_gc_ratios, + ] + + if (run_config.stages.lilac) { + if (params.genome_version.toString() == '38' && params.genome_type == 'alt' && params.containsKey('ref_data_hla_slice_bed')) { + checkPathParamList.add(params.ref_data_hla_slice_bed) + } + } + + for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + // Create channel for versions // channel: [ versions.yml ] ch_versions = Channel.empty() @@ -91,7 +71,9 @@ workflow WGTS { ch_inputs = Channel.fromList(inputs) // Set up reference data, assign more human readable variables + prep_config = WorkflowMain.getPrepConfigFromSamplesheet(run_config) PREPARE_REFERENCE( + prep_config, run_config, ) ref_data = PREPARE_REFERENCE.out @@ -193,22 +175,24 @@ workflow WGTS { ch_redux_dna_normal_out = ch_inputs.map { meta -> [meta, [], []] } ch_redux_dna_donor_out = ch_inputs.map { meta -> [meta, [], []] } - ch_redux_dna_tumor_tsv_out = ch_inputs.map { meta -> [meta, [], [], [], []] } - ch_redux_dna_normal_tsv_out = ch_inputs.map { meta -> [meta, [], [], [], []] } - ch_redux_dna_donor_tsv_out = ch_inputs.map { meta -> [meta, [], [], [], []] } + ch_redux_dna_tumor_tsv_out = ch_inputs.map { meta -> [meta, [], [], []] } + ch_redux_dna_normal_tsv_out = ch_inputs.map { meta -> [meta, [], [], []] } + ch_redux_dna_donor_tsv_out = ch_inputs.map { meta -> [meta, [], [], []] } } // // MODULE: Run Isofox to analyse RNA data // + + isofox_counts = params.isofox_counts ? file(params.isofox_counts) : hmf_data.isofox_counts + isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : hmf_data.isofox_gc_ratios + isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_WTS + // channel: [ meta, isofox_dir ] ch_isofox_out = Channel.empty() if (run_config.stages.isofox) { - isofox_counts = params.isofox_counts ? file(params.isofox_counts) : hmf_data.isofox_counts - isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : hmf_data.isofox_gc_ratios - ISOFOX_QUANTIFICATION( ch_inputs, ch_align_rna_tumor_out, @@ -250,6 +234,7 @@ workflow WGTS { ref_data.genome_version, hmf_data.heterozygous_sites, [], // target_region_bed + [], // tumor_min_depth ) ch_versions = ch_versions.mix(AMBER_PROFILING.out.versions) @@ -276,6 +261,7 @@ workflow WGTS { hmf_data.gc_profile, hmf_data.diploid_bed, [], // panel_target_region_normalisation + false, // targeted_mode ) ch_versions = ch_versions.mix(COBALT_PROFILING.out.versions) @@ -305,13 +291,12 @@ workflow WGTS { ref_data.genome_fai, ref_data.genome_dict, ref_data.genome_img, - hmf_data.sv_prep_blocklist, hmf_data.known_fusions, hmf_data.gridss_pon_breakends, hmf_data.gridss_pon_breakpoints, - hmf_data.repeatmasker_annotations, hmf_data.decoy_sequences_image, - hmf_data.unmap_regions, + hmf_data.repeatmasker_annotations, + hmf_data.unmap_regions ) ch_versions = ch_versions.mix(ESVEE_CALLING.out.versions) @@ -349,14 +334,16 @@ workflow WGTS { ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, + hmf_data.sage_pon, hmf_data.sage_known_hotspots_somatic, hmf_data.sage_known_hotspots_germline, - hmf_data.sage_actionable_panel, - hmf_data.sage_coverage_panel, hmf_data.sage_highconf_regions, hmf_data.segment_mappability, hmf_data.driver_gene_panel, hmf_data.ensembl_data_resources, + hmf_data.gnomad_resource, + true, // enable_germline + false, // targeted_mode ) ch_versions = ch_versions.mix(SAGE_CALLING.out.versions) @@ -390,8 +377,8 @@ workflow WGTS { ref_data.genome_fasta, ref_data.genome_version, ref_data.genome_fai, - hmf_data.sage_pon, [], // sage_pon_artefacts + hmf_data.sage_pon, hmf_data.sage_blocklist_regions, hmf_data.sage_blocklist_sites, hmf_data.clinvar_annotations, @@ -454,28 +441,31 @@ workflow WGTS { } // - // SUBWORKFLOW: Append RNA data to SAGE VCF + // SUBWORKFLOW: Append read data to SAGE VCF // - // channel: [ meta, sage_append_vcf ] + // channel: [ meta, sage_append_dir ] ch_sage_somatic_append_out = Channel.empty() ch_sage_germline_append_out = Channel.empty() if (run_config.stages.orange || run_config.stages.neo) { SAGE_APPEND( ch_inputs, - ch_align_rna_tumor_out, ch_purple_out, + ch_inputs.map { meta -> [meta, [], []] }, // ch_tumor_redux_bam + ch_inputs.map { meta -> [meta, [], [], []] }, // ch_tumor_redux_tsv + ch_align_rna_tumor_out, ref_data.genome_fasta, ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, - run_config.stages.orange, // run_germline [run for ORANGE but not Neo] + run_config.stages.orange, // enable_germline [run for ORANGE but not Neo] + false, // targeted_mode ) ch_versions = ch_versions.mix(SAGE_APPEND.out.versions) - ch_sage_somatic_append_out = ch_sage_somatic_append_out.mix(SAGE_APPEND.out.somatic_vcf) - ch_sage_germline_append_out = ch_sage_germline_append_out.mix(SAGE_APPEND.out.germline_vcf) + ch_sage_somatic_append_out = ch_sage_somatic_append_out.mix(SAGE_APPEND.out.somatic_dir) + ch_sage_germline_append_out = ch_sage_germline_append_out.mix(SAGE_APPEND.out.germline_dir) } else { @@ -551,6 +541,8 @@ workflow WGTS { ch_redux_dna_normal_out, ref_data.genome_fasta, ref_data.genome_version, + hmf_data.driver_gene_panel, + hmf_data.ensembl_data_resources, ) ch_versions = ch_versions.mix(BAMTOOLS_METRICS.out.versions) @@ -651,6 +643,7 @@ workflow WGTS { ref_data.genome_fai, hmf_data.lilac_resources, ref_data_hla_slice_bed, + false, // targeted_mode ) ch_versions = ch_versions.mix(LILAC_CALLING.out.versions)